summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h21
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp194
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp522
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h161
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp287
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h19
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp35
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp317
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h189
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp61
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp93
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp31
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp336
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp1295
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h47
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td341
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp26
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td2748
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td153
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td489
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td1957
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td113
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp49
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp15
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h18
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp91
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h16
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp37
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp4039
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp4455
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h99
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp3818
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h336
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h2459
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp399
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h34
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h (renamed from contrib/llvm/lib/Target/ARM/ARMAddressingModes.h)104
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp (renamed from contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp)77
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h (renamed from contrib/llvm/lib/Target/ARM/ARMBaseInfo.h)163
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h (renamed from contrib/llvm/lib/Target/ARM/ARMFixupKinds.h)0
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp (renamed from contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp)386
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp (renamed from contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp)0
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h (renamed from contrib/llvm/lib/Target/ARM/ARMMCExpr.h)0
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp179
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h21
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp (renamed from contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp)5
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp149
-rw-r--r--contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp34
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp21
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp21
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp7
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp8
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td2
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp1
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp12
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h5
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp35
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h4
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp4
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp12
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h5
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp39
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/CBackend/CBackend.cpp180
-rw-r--r--contrib/llvm/lib/Target/CBackend/CTargetMachine.h5
-rw-r--r--contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp52
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h4
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp20
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h14
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp16
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h6
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td10
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp25
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h5
-rw-r--r--contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp141
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h5
-rw-r--r--contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp27
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp24
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile15
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp76
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h9
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp4
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.h12
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp21
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp13
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h56
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td21
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h4
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp172
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h12
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp50
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h5
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp2
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp (renamed from contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp)17
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h240
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp (renamed from contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp)8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp93
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h11
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp4
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp51
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h4
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp14
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp7
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h4
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h5
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp11
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h5
-rw-r--r--contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mangler.cpp4
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp117
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h113
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h90
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp52
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp122
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h21
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.h3
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.td32
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td214
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp42
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallingConv.td55
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp245
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp204
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp19
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.h5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp185
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp848
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h16
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td236
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFormats.td44
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp210
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.h37
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td832
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp230
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsJITInfo.h70
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp64
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.h5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp9
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h7
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.h9
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp205
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td119
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRelocations.h41
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp38
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h15
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp73
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.h48
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/PTX/CMakeLists.txt26
-rw-r--r--contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp192
-rw-r--r--contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h47
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h63
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp53
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.h28
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.td28
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp500
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h57
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXCallingConv.td29
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp179
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp182
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp273
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrFormats.td31
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp82
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.td1241
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td278
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td78
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp15
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp32
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp36
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h163
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXParamManager.cpp73
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXParamManager.h86
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp58
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp29
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h18
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td540
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp149
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h53
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.h11
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp318
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.h60
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/Makefile15
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp2
-rwxr-xr-xcontrib/llvm/lib/Target/PTX/generate-register-td.py163
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp (renamed from contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp)88
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h70
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h (renamed from contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h)0
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp (renamed from contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp)9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp115
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp (renamed from contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp)0
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h (renamed from contrib/llvm/lib/Target/PowerPC/PPCPredicates.h)2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp17
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h1
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp65
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h17
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp36
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp79
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp71
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp36
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp8
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp15
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp27
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h16
-rw-r--r--contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp39
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp17
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp13
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h5
-rw-r--r--contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Target.cpp10
-rw-r--r--contrib/llvm/lib/Target/TargetAsmInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/TargetAsmLexer.cpp14
-rw-r--r--contrib/llvm/lib/Target/TargetData.cpp53
-rw-r--r--contrib/llvm/lib/Target/TargetFrameLowering.cpp8
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp59
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp67
-rw-r--r--contrib/llvm/lib/Target/TargetRegisterInfo.cpp49
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp20
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp84
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp77
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h26
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c165
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h8
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp10
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp31
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp7
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (renamed from contrib/llvm/lib/Target/X86/X86AsmBackend.cpp)29
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h548
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h (renamed from contrib/llvm/lib/Target/X86/X86FixupKinds.h)0
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp (renamed from contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp)198
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp338
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h45
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp (renamed from contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp)6
-rw-r--r--contrib/llvm/lib/Target/X86/SSEDomainFix.cpp506
-rw-r--r--contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/Makefile15
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp75
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h20
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h30
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td78
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp23
-rw-r--r--contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp36
-rw-r--r--contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp93
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp34
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp625
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.h7
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp16
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp2940
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h62
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td96
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td102
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td4
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFormats.td8
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td84
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp1838
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h553
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td209
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td4104
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td114
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrVMX.td10
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp23
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h20
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp165
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h25
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td11
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp78
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h36
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp173
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.h22
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp76
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.h22
-rw-r--r--contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp105
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp48
-rw-r--r--contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp60
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp11
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp11
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp37
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.h5
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp12
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h5
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td81
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp15
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h5
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp10
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h5
410 files changed, 31909 insertions, 22361 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 08dc340..16d0da3 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -15,7 +15,7 @@
#ifndef TARGET_ARM_H
#define TARGET_ARM_H
-#include "ARMBaseInfo.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
@@ -29,19 +29,7 @@ class ARMBaseTargetMachine;
class FunctionPass;
class JITCodeEmitter;
class MachineInstr;
-class MCCodeEmitter;
class MCInst;
-class MCInstrInfo;
-class MCObjectWriter;
-class MCSubtargetInfo;
-class TargetAsmBackend;
-class formatted_raw_ostream;
-
-MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-
-TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -53,7 +41,6 @@ FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
-FunctionPass *createNEONMoveFixPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
@@ -61,12 +48,6 @@ FunctionPass *createThumb2SizeReductionPass();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
-/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
-MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint32_t CPUType,
- uint32_t CPUSubtype);
-
} // end namespace llvm;
#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index cf333cc..5c727ad 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
"Thumb mode">;
+def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
+ "Native client mode">;
+
//===----------------------------------------------------------------------===//
// ARM Subtarget features.
//
@@ -85,12 +88,16 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
- "Supports v7 DSP instructions in Thumb2.">;
+ "Supports v7 DSP instructions in Thumb2">;
// Multiprocessing extension.
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
+// M-series ISA?
+def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
+ "Is microcontroller profile ('M' series)">;
+
// ARM ISAs.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
@@ -105,7 +112,7 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
[HasV5TEOps]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>;
+ [HasV6Ops, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops]>;
@@ -182,12 +189,14 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
// V6M Processors.
def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
- FeatureDB]>;
+ FeatureDB, FeatureMClass]>;
// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops]>;
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
+ FeatureDSPThumb2]>;
def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
+ FeatureHasSlowFPVMLx,
+ FeatureDSPThumb2]>;
// V7a Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
@@ -203,14 +212,14 @@ def : Processor<"cortex-a9-mp", CortexA9Itineraries,
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv]>;
+ FeatureHWDiv, FeatureMClass]>;
// V7EM Processors.
def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
FeatureT2XtPk, FeatureVFP2,
- FeatureVFPOnlySP]>;
+ FeatureVFPOnlySP, FeatureMClass]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index dbc3ee4..ea3319f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -15,15 +15,15 @@
#define DEBUG_TYPE "asm-printer"
#include "ARM.h"
#include "ARMAsmPrinter.h"
-#include "ARMAddressingModes.h"
#include "ARMBuildAttrs.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMMCExpr.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "InstPrinter/ARMInstPrinter.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
@@ -45,13 +45,13 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
using namespace llvm;
@@ -92,7 +92,7 @@ namespace {
case ARMBuildAttrs::Advanced_SIMD_arch:
case ARMBuildAttrs::VFP_arch:
Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
- break;
+ break;
default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
}
}
@@ -100,13 +100,41 @@ namespace {
};
class ObjectAttributeEmitter : public AttributeEmitter {
+ // This structure holds all attributes, accounting for
+ // their string/numeric value, so we can later emmit them
+ // in declaration order, keeping all in the same vector
+ struct AttributeItemType {
+ enum {
+ HiddenAttribute = 0,
+ NumericAttribute,
+ TextAttribute
+ } Type;
+ unsigned Tag;
+ unsigned IntValue;
+ StringRef StringValue;
+ } AttributeItem;
+
MCObjectStreamer &Streamer;
StringRef CurrentVendor;
- SmallString<64> Contents;
+ SmallVector<AttributeItemType, 64> Contents;
+
+ // Account for the ULEB/String size of each item,
+ // not just the number of items
+ size_t ContentsSize;
+ // FIXME: this should be in a more generic place, but
+ // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
+ size_t getULEBSize(int Value) {
+ size_t Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t); // Is this really necessary?
+ } while (Value);
+ return Size;
+ }
public:
ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
- Streamer(Streamer_), CurrentVendor("") { }
+ Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { }
void MaybeSwitchVendor(StringRef Vendor) {
assert(!Vendor.empty() && "Vendor cannot be empty.");
@@ -124,20 +152,32 @@ namespace {
}
void EmitAttribute(unsigned Attribute, unsigned Value) {
- // FIXME: should be ULEB
- Contents += Attribute;
- Contents += Value;
+ AttributeItemType attr = {
+ AttributeItemType::NumericAttribute,
+ Attribute,
+ Value,
+ StringRef("")
+ };
+ ContentsSize += getULEBSize(Attribute);
+ ContentsSize += getULEBSize(Value);
+ Contents.push_back(attr);
}
void EmitTextAttribute(unsigned Attribute, StringRef String) {
- Contents += Attribute;
- Contents += UppercaseString(String);
- Contents += 0;
+ AttributeItemType attr = {
+ AttributeItemType::TextAttribute,
+ Attribute,
+ 0,
+ String
+ };
+ ContentsSize += getULEBSize(Attribute);
+ // String + \0
+ ContentsSize += String.size()+1;
+
+ Contents.push_back(attr);
}
void Finish() {
- const size_t ContentsSize = Contents.size();
-
// Vendor size + Vendor name + '\0'
const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
@@ -151,7 +191,23 @@ namespace {
Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(Contents, 0);
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String)
+ for (unsigned int i=0; i<Contents.size(); ++i) {
+ AttributeItemType item = Contents[i];
+ Streamer.EmitULEB128IntValue(item.Tag, 0);
+ switch (item.Type) {
+ case AttributeItemType::NumericAttribute:
+ Streamer.EmitULEB128IntValue(item.IntValue, 0);
+ break;
+ case AttributeItemType::TextAttribute:
+ Streamer.EmitBytes(UppercaseString(item.StringValue), 0);
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ default:
+ assert(0 && "Invalid attribute type");
+ }
+ }
Contents.clear();
}
@@ -184,7 +240,7 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
// S registers are described as bit-pieces of a register
// S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
// S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
-
+
unsigned SReg = Reg - ARM::S0;
bool odd = SReg & 0x1;
unsigned Rx = 256 + (SReg >> 1);
@@ -209,12 +265,13 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
} else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
// Q registers Q0-Q15 are described by composing two D registers together.
- // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8)
+ // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1)
+ // DW_OP_piece(8)
unsigned QReg = Reg - ARM::Q0;
unsigned D1 = 256 + 2 * QReg;
unsigned D2 = D1 + 1;
-
+
OutStreamer.AddComment("DW_OP_regx for Q register: D1");
EmitInt8(dwarf::DW_OP_regx);
EmitULEB128(D1);
@@ -233,6 +290,8 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
}
void ARMAsmPrinter::EmitFunctionEntryLabel() {
+ OutStreamer.ForceCodeRegion();
+
if (AFI->isThumbFunction()) {
OutStreamer.EmitAssemblerFlag(MCAF_Code16);
OutStreamer.EmitThumbFunc(CurrentFnSym);
@@ -395,16 +454,16 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// This takes advantage of the 2 operand-ness of ldm/stm and that we've
// already got the operands in registers that are operands to the
// inline asm statement.
-
+
O << "{" << ARMInstPrinter::getRegisterName(RegBegin);
-
+
// FIXME: The register allocator not only may not have given us the
// registers in sequence, but may not be in ascending registers. This
// will require changes in the register allocator that'll need to be
// propagated down here if the operands change.
unsigned RegOps = OpNum + 1;
while (MI->getOperand(RegOps).isReg()) {
- O << ", "
+ O << ", "
<< ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg());
RegOps++;
}
@@ -413,14 +472,34 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
+ case 'R': // The most significant register of a pair.
+ case 'Q': { // The least significant register of a pair.
+ if (OpNum == 0)
+ return true;
+ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
+ if (!FlagsOP.isImm())
+ return true;
+ unsigned Flags = FlagsOP.getImm();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ if (NumVals != 2)
+ return true;
+ unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
+ if (RegOp >= MI->getNumOperands())
+ return true;
+ const MachineOperand &MO = MI->getOperand(RegOp);
+ if (!MO.isReg())
+ return true;
+ unsigned Reg = MO.getReg();
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+
// These modifiers are not yet supported.
case 'p': // The high single-precision register of a VFP double-precision
// register.
case 'e': // The low doubleword register of a NEON quad register.
case 'f': // The high doubleword register of a NEON quad register.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
- case 'Q': // The least significant register of a pair.
- case 'R': // The most significant register of a pair.
case 'H': // The highest-numbered register of a pair.
return true;
}
@@ -437,7 +516,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
+
switch (ExtraCode[0]) {
case 'A': // A memory operand for a VLD1/VST1 instruction.
default: return true; // Unknown modifier.
@@ -448,7 +527,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
}
-
+
const MachineOperand &MO = MI->getOperand(OpNum);
assert(MO.isReg() && "unexpected inline asm memory operand");
O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
@@ -772,13 +851,19 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
MCSym = OutContext.GetOrCreateSymbol(OS.str());
} else if (ACPV->isBlockAddress()) {
- MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress());
+ const BlockAddress *BA =
+ cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
+ MCSym = GetBlockAddressSymbol(BA);
} else if (ACPV->isGlobalValue()) {
- const GlobalValue *GV = ACPV->getGV();
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
MCSym = GetARMGVSymbol(GV);
+ } else if (ACPV->isMachineBasicBlock()) {
+ const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB();
+ MCSym = MBB->getSymbol();
} else {
assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
- MCSym = GetExternalSymbolSymbol(ACPV->getSymbol());
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ MCSym = GetExternalSymbolSymbol(Sym);
}
// Create an MCSymbol for the reference.
@@ -822,6 +907,9 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
+ // Tag the jump table appropriately for precise disassembly.
+ OutStreamer.EmitJumpTable32Region();
+
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
OutStreamer.EmitLabel(JTISymbol);
@@ -847,6 +935,11 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
OutContext),
OutContext);
+ // If we're generating a table of Thumb addresses in static relocation
+ // model, we need to add one to keep interworking correctly.
+ else if (AFI->isThumbFunction())
+ Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext),
+ OutContext);
OutStreamer.EmitValue(Expr, 4);
}
}
@@ -859,6 +952,14 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
unsigned JTI = MO1.getIndex();
// Emit a label for the jump table.
+ if (MI->getOpcode() == ARM::t2TBB_JT) {
+ OutStreamer.EmitJumpTable8Region();
+ } else if (MI->getOpcode() == ARM::t2TBH_JT) {
+ OutStreamer.EmitJumpTable16Region();
+ } else {
+ OutStreamer.EmitJumpTable32Region();
+ }
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
OutStreamer.EmitLabel(JTISymbol);
@@ -881,6 +982,8 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
MCInst BrInst;
BrInst.setOpcode(ARM::t2B);
BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+ BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ BrInst.addOperand(MCOperand::CreateReg(0));
OutStreamer.EmitInstruction(BrInst);
continue;
}
@@ -994,7 +1097,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
i != NumOps; ++i)
RegList.push_back(MI->getOperand(i).getReg());
break;
- case ARM::STR_PRE:
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
assert(MI->getOperand(2).getReg() == ARM::SP &&
"Only stack pointer as a source reg is supported");
RegList.push_back(SrcReg);
@@ -1074,10 +1178,20 @@ extern cl::opt<bool> EnableARMEHABI;
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (MI->getOpcode() != ARM::CONSTPOOL_ENTRY)
+ OutStreamer.EmitCodeRegion();
+
+ // Emit unwinding stuff for frame-related instructions
+ if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+ EmitUnwindingInstruction(MI);
+
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(OutStreamer, MI))
return;
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag setting opcode should be expanded early");
+
// Check for manual lowerings.
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -1372,6 +1486,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
EmitAlignment(2);
+
+ // Mark the constant pool entry as data if we're not already in a data
+ // region.
+ OutStreamer.EmitDataRegion();
OutStreamer.EmitLabel(GetCPISymbol(LabelId));
const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
@@ -1379,7 +1497,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
EmitGlobalConstant(MCPE.Val.ConstVal);
-
return;
}
case ARM::t2BR_JT: {
@@ -1590,6 +1707,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
TmpInst.setOpcode(ARM::tB);
TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
OutStreamer.EmitInstruction(TmpInst);
}
{
@@ -1804,10 +1923,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
- // Emit unwinding stuff for frame-related instructions
- if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
- EmitUnwindingInstruction(MI);
-
OutStreamer.EmitInstruction(TmpInst);
}
@@ -1815,20 +1930,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Target Registry Stuff
//===----------------------------------------------------------------------===//
-static MCInstPrinter *createARMMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new ARMInstPrinter(MAI);
- return 0;
-}
-
// Force static initialization.
extern "C" void LLVMInitializeARMAsmPrinter() {
RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
-
- TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 649bd7d..408edfc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -13,11 +13,11 @@
#include "ARMBaseInstrInfo.h"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
@@ -45,6 +46,10 @@ static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
+static cl::opt<bool>
+WidenVMOVS("widen-vmovs", cl::Hidden,
+ cl::desc("Widen ARM vmovs to vmovd when possible"));
+
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
unsigned MLxOpc; // MLA / MLS opcode
@@ -171,7 +176,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
- get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+ get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
.addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
.addImm(Pred).addReg(0).addReg(0);
} else
@@ -399,6 +404,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
int BccOpc = !AFI->isThumbFunction()
? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+ bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -406,9 +412,12 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
"ARM branch conditions have two components!");
if (FBB == 0) {
- if (Cond.empty()) // Unconditional branch?
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
- else
+ if (Cond.empty()) { // Unconditional branch?
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else
BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
return 1;
@@ -417,7 +426,10 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// Two-way conditional branch.
BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
@@ -627,7 +639,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool SPRDest = ARM::SPRRegClass.contains(DestReg);
bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
- unsigned Opc;
+ unsigned Opc = 0;
if (SPRDest && SPRSrc)
Opc = ARM::VMOVS;
else if (GPRDest && SPRSrc)
@@ -638,19 +650,40 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
- else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVQQ;
- else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVQQQQ;
- else
- llvm_unreachable("Impossible reg-to-reg copy");
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (Opc == ARM::VORRq)
+ if (Opc) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+ if (Opc == ARM::VORRq)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
AddDefaultPred(MIB);
+ return;
+ }
+
+ // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place.
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg) ||
+ ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum.");
+ unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ?
+ ARM::qsub_1 : ARM::qsub_3;
+ for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, i);
+ unsigned Src = TRI->getSubReg(SrcReg, i);
+ MachineInstrBuilder Mov =
+ AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq))
+ .addReg(Dst, RegState::Define)
+ .addReg(Src, getKillRegState(KillSrc))
+ .addReg(Src, getKillRegState(KillSrc)));
+ if (i == EndSubReg) {
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
+ }
+ }
+ return;
+ }
+ llvm_unreachable("Impossible reg-to-reg copy");
}
static const
@@ -683,82 +716,84 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here. Likewise, rGPR.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
- || RC == ARM::rGPRRegisterClass)
- RC = ARM::GPRRegisterClass;
-
- switch (RC->getID()) {
- case ARM::GPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- break;
- case ARM::SPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- break;
- case ARM::DPRRegClassID:
- case ARM::DPR_VFP2RegClassID:
- case ARM::DPR_8RegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- break;
- case ARM::QPRRegClassID:
- case ARM::QPR_VFP2RegClassID:
- case ARM::QPR_8RegClassID:
- if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 16:
+ if (ARM::QPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
- } else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addMemOperand(MMO));
- }
- break;
- case ARM::QQPRRegClassID:
- case ARM::QQPR_VFP2RegClassID:
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- // FIXME: It's possible to only store part of the QQ register if the
- // spilled def has a sub-register index.
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
- } else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
.addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
- AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
- }
- break;
- case ARM::QQQQPRRegClassID: {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
- AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
- break;
- }
- default:
- llvm_unreachable("Unknown regclass!");
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ default:
+ llvm_unreachable("Unknown reg class!");
}
}
@@ -809,6 +844,12 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
+unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+}
+
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
@@ -826,72 +867,77 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
- || RC == ARM::rGPRRegisterClass)
- RC = ARM::GPRRegisterClass;
-
- switch (RC->getID()) {
- case ARM::GPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- break;
- case ARM::SPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- case ARM::DPRRegClassID:
- case ARM::DPR_VFP2RegClassID:
- case ARM::DPR_8RegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- case ARM::QPRRegClassID:
- case ARM::QPR_VFP2RegClassID:
- case ARM::QPR_8RegClassID:
- if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+ case 16:
+ if (ARM::QPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
- } else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
- .addFrameIndex(FI)
- .addMemOperand(MMO));
- }
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- case ARM::QQPRRegClassID:
- case ARM::QQPR_VFP2RegClassID:
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
- } else {
- MachineInstrBuilder MIB =
+ } else {
+ MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- }
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- case ARM::QQQQPRRegClassID: {
- MachineInstrBuilder MIB =
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- }
default:
llvm_unreachable("Unknown regclass!");
}
@@ -944,6 +990,78 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return 0;
}
+unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+}
+
+bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
+ // This hook gets to expand COPY instructions before they become
+ // copyPhysReg() calls. Look for VMOVS instructions that can legally be
+ // widened to VMOVD. We prefer the VMOVD when possible because it may be
+ // changed into a VORR that can go down the NEON pipeline.
+ if (!WidenVMOVS || !MI->isCopy())
+ return false;
+
+ // Look for a copy between even S-registers. That is where we keep floats
+ // when using NEON v2f32 instructions for f32 arithmetic.
+ unsigned DstRegS = MI->getOperand(0).getReg();
+ unsigned SrcRegS = MI->getOperand(1).getReg();
+ if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DstRegD || !SrcRegD)
+ return false;
+
+ // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
+ // legal if the COPY already defines the full DstRegD, and it isn't a
+ // sub-register insertion.
+ if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
+ return false;
+
+ // A dead copy shouldn't show up here, but reject it just in case.
+ if (MI->getOperand(0).isDead())
+ return false;
+
+ // All clear, widen the COPY.
+ DEBUG(dbgs() << "widening: " << *MI);
+
+ // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
+ // or some other super-register.
+ int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
+ if (ImpDefIdx != -1)
+ MI->RemoveOperand(ImpDefIdx);
+
+ // Change the opcode and operands.
+ MI->setDesc(get(ARM::VMOVD));
+ MI->getOperand(0).setReg(DstRegD);
+ MI->getOperand(1).setReg(SrcRegD);
+ AddDefaultPred(MachineInstrBuilder(MI));
+
+ // We are now reading SrcRegD instead of SrcRegS. This may upset the
+ // register scavenger and machine verifier, so we need to indicate that we
+ // are reading an undefined value from SrcRegD, but a proper value from
+ // SrcRegS.
+ MI->getOperand(1).setIsUndef();
+ MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+
+ // SrcRegD may actually contain an unrelated value in the ssub_1
+ // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
+ if (MI->getOperand(1).isKill()) {
+ MI->getOperand(1).setIsKill(false);
+ MI->addRegisterKilled(SrcRegS, TRI, true);
+ }
+
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+}
+
MachineInstr*
ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -974,17 +1092,24 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
// instructions, so that's probably OK, but is PIC always correct when
// we get here?
if (ACPV->isGlobalValue())
- NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
- ARMCP::CPValue, 4);
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
else if (ACPV->isExtSymbol())
- NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
- ACPV->getSymbol(), PCLabelId, 4);
+ NewCPV = ARMConstantPoolSymbol::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
else if (ACPV->isBlockAddress())
- NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
- ARMCP::CPBlockAddress, 4);
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
else if (ACPV->isLSDA())
- NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
- ARMCP::CPLSDA, 4);
+ NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
+ else if (ACPV->isMachineBasicBlock())
+ NewCPV = ARMConstantPoolMBB::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
@@ -1289,7 +1414,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
// Attempt to estimate the relative costs of predication versus branching.
unsigned TUnpredCost = Probability.getNumerator() * TCycles;
TUnpredCost /= Probability.getDenominator();
-
+
uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
unsigned FUnpredCost = Comp * FCycles;
FUnpredCost /= Probability.getDenominator();
@@ -1330,6 +1455,57 @@ int llvm::getMatchingCondBranchOpcode(int Opc) {
}
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
+/// instruction is encoded with an 'S' bit is determined by the optional CPSR
+/// def operand.
+///
+/// This will go away once we can teach tblgen how to set the optional CPSR def
+/// operand itself.
+struct AddSubFlagsOpcodePair {
+ unsigned PseudoOpc;
+ unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADDSri, ARM::ADDri},
+ {ARM::ADDSrr, ARM::ADDrr},
+ {ARM::ADDSrsi, ARM::ADDrsi},
+ {ARM::ADDSrsr, ARM::ADDrsr},
+
+ {ARM::SUBSri, ARM::SUBri},
+ {ARM::SUBSrr, ARM::SUBrr},
+ {ARM::SUBSrsi, ARM::SUBrsi},
+ {ARM::SUBSrsr, ARM::SUBrsr},
+
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrr, ARM::RSBrr},
+ {ARM::RSBSrsi, ARM::RSBrsi},
+ {ARM::RSBSrsr, ARM::RSBrsr},
+
+ {ARM::t2ADDSri, ARM::t2ADDri},
+ {ARM::t2ADDSrr, ARM::t2ADDrr},
+ {ARM::t2ADDSrs, ARM::t2ADDrs},
+
+ {ARM::t2SUBSri, ARM::t2SUBri},
+ {ARM::t2SUBSrr, ARM::t2SUBrr},
+ {ARM::t2SUBSrs, ARM::t2SUBrs},
+
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
+ static const int NPairs =
+ sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+ for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0],
+ *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) {
+ if (OldOpc == OpcPair->PseudoOpc) {
+ return OpcPair->MachineOpc;
+ }
+ }
+ return 0;
+}
+
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
@@ -1862,7 +2038,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::STMIB_UPD:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
- case ARM::tSTMIA:
case ARM::tSTMIA_UPD:
case ARM::tPOP_RET:
case ARM::tPOP:
@@ -2128,7 +2303,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::STMDA_UPD:
case ARM::STMDB_UPD:
case ARM::STMIB_UPD:
- case ARM::tSTMIA:
case ARM::tSTMIA_UPD:
case ARM::tPOP_RET:
case ARM::tPOP:
@@ -2567,6 +2741,15 @@ hasLowDefLatency(const InstrItineraryData *ItinData,
return false;
}
+bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ if (convertAddSubFlagsOpcode(MI->getOpcode())) {
+ ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
+ return false;
+ }
+ return true;
+}
+
bool
ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
unsigned &AddSubOpc,
@@ -2582,3 +2765,66 @@ ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
HasLane = Entry.HasLane;
return true;
}
+
+//===----------------------------------------------------------------------===//
+// Execution domains.
+//===----------------------------------------------------------------------===//
+//
+// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
+// and some can go down both. The vmov instructions go down the VFP pipeline,
+// but they can be changed to vorr equivalents that are executed by the NEON
+// pipeline.
+//
+// We use the following execution domain numbering:
+//
+enum ARMExeDomain {
+ ExeGeneric = 0,
+ ExeVFP = 1,
+ ExeNEON = 2
+};
+//
+// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
+//
+std::pair<uint16_t, uint16_t>
+ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
+ // predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
+ // No other instructions can be swizzled, so just determine their domain.
+ unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+
+ if (Domain & ARMII::DomainNEON)
+ return std::make_pair(ExeNEON, 0);
+
+ // Certain instructions can go either way on Cortex-A8.
+ // Treat them as NEON instructions.
+ if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
+ return std::make_pair(ExeNEON, 0);
+
+ if (Domain & ARMII::DomainVFP)
+ return std::make_pair(ExeVFP, 0);
+
+ return std::make_pair(ExeGeneric, 0);
+}
+
+void
+ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+ // We only know how to change VMOVD into VORR.
+ assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
+ if (Domain != ExeNEON)
+ return;
+
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+
+ // Change to a VORRd which requires two identical use operands.
+ MI->setDesc(get(ARM::VORRd));
+
+ // Add the extra source operand and new predicates.
+ // This will go before any implicit ops.
+ AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 507e897..0f9f321 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -27,146 +27,6 @@ namespace llvm {
class ARMSubtarget;
class ARMBaseRegisterInfo;
-/// ARMII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace ARMII {
- enum {
- //===------------------------------------------------------------------===//
- // Instruction Flags.
-
- //===------------------------------------------------------------------===//
- // This four-bit field describes the addressing mode used.
- AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
-
- // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
- // and store ops only. Generic "updating" flag is used for ld/st multiple.
- // The index mode enums are declared in ARMBaseInfo.h
- IndexModeShift = 5,
- IndexModeMask = 3 << IndexModeShift,
-
- //===------------------------------------------------------------------===//
- // Instruction encoding formats.
- //
- FormShift = 7,
- FormMask = 0x3f << FormShift,
-
- // Pseudo instructions
- Pseudo = 0 << FormShift,
-
- // Multiply instructions
- MulFrm = 1 << FormShift,
-
- // Branch instructions
- BrFrm = 2 << FormShift,
- BrMiscFrm = 3 << FormShift,
-
- // Data Processing instructions
- DPFrm = 4 << FormShift,
- DPSoRegFrm = 5 << FormShift,
-
- // Load and Store
- LdFrm = 6 << FormShift,
- StFrm = 7 << FormShift,
- LdMiscFrm = 8 << FormShift,
- StMiscFrm = 9 << FormShift,
- LdStMulFrm = 10 << FormShift,
-
- LdStExFrm = 11 << FormShift,
-
- // Miscellaneous arithmetic instructions
- ArithMiscFrm = 12 << FormShift,
- SatFrm = 13 << FormShift,
-
- // Extend instructions
- ExtFrm = 14 << FormShift,
-
- // VFP formats
- VFPUnaryFrm = 15 << FormShift,
- VFPBinaryFrm = 16 << FormShift,
- VFPConv1Frm = 17 << FormShift,
- VFPConv2Frm = 18 << FormShift,
- VFPConv3Frm = 19 << FormShift,
- VFPConv4Frm = 20 << FormShift,
- VFPConv5Frm = 21 << FormShift,
- VFPLdStFrm = 22 << FormShift,
- VFPLdStMulFrm = 23 << FormShift,
- VFPMiscFrm = 24 << FormShift,
-
- // Thumb format
- ThumbFrm = 25 << FormShift,
-
- // Miscelleaneous format
- MiscFrm = 26 << FormShift,
-
- // NEON formats
- NGetLnFrm = 27 << FormShift,
- NSetLnFrm = 28 << FormShift,
- NDupFrm = 29 << FormShift,
- NLdStFrm = 30 << FormShift,
- N1RegModImmFrm= 31 << FormShift,
- N2RegFrm = 32 << FormShift,
- NVCVTFrm = 33 << FormShift,
- NVDupLnFrm = 34 << FormShift,
- N2RegVShLFrm = 35 << FormShift,
- N2RegVShRFrm = 36 << FormShift,
- N3RegFrm = 37 << FormShift,
- N3RegVShFrm = 38 << FormShift,
- NVExtFrm = 39 << FormShift,
- NVMulSLFrm = 40 << FormShift,
- NVTBLFrm = 41 << FormShift,
-
- //===------------------------------------------------------------------===//
- // Misc flags.
-
- // UnaryDP - Indicates this is a unary data processing instruction, i.e.
- // it doesn't have a Rn operand.
- UnaryDP = 1 << 13,
-
- // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
- // a 16-bit Thumb instruction if certain conditions are met.
- Xform16Bit = 1 << 14,
-
- //===------------------------------------------------------------------===//
- // Code domain.
- DomainShift = 15,
- DomainMask = 7 << DomainShift,
- DomainGeneral = 0 << DomainShift,
- DomainVFP = 1 << DomainShift,
- DomainNEON = 2 << DomainShift,
- DomainNEONA8 = 4 << DomainShift,
-
- //===------------------------------------------------------------------===//
- // Field shifts - such shifts are used to set field while generating
- // machine instructions.
- //
- // FIXME: This list will need adjusting/fixing as the MC code emitter
- // takes shape and the ARMCodeEmitter.cpp bits go away.
- ShiftTypeShift = 4,
-
- M_BitShift = 5,
- ShiftImmShift = 5,
- ShiftShift = 7,
- N_BitShift = 7,
- ImmHiShift = 8,
- SoRotImmShift = 8,
- RegRsShift = 8,
- ExtRotImmShift = 10,
- RegRdLoShift = 12,
- RegRdShift = 12,
- RegRdHiShift = 16,
- RegRnShift = 16,
- S_BitShift = 20,
- W_BitShift = 21,
- AM3_I_BitShift = 22,
- D_BitShift = 22,
- U_BitShift = 23,
- P_BitShift = 24,
- I_BitShift = 25,
- CondShift = 28
- };
-}
-
class ARMBaseInstrInfo : public ARMGenInstrInfo {
const ARMSubtarget &Subtarget;
@@ -241,6 +101,10 @@ public:
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
+ virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
@@ -259,6 +123,8 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx,
uint64_t Offset,
@@ -346,6 +212,12 @@ public:
int getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
+
+ /// VFP/NEON execution domains.
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const;
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+
private:
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
@@ -382,6 +254,9 @@ private:
bool hasLowDefLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx) const;
+ /// verifyInstruction - Perform target specific instruction verification.
+ bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
@@ -464,6 +339,12 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
int getMatchingCondBranchOpcode(int Opc);
+
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
+/// the instruction is encoded with an 'S' bit is determined by the optional
+/// CPSR def operand.
+unsigned convertAddSubFlagsOpcode(unsigned OldOpc);
+
/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
/// code.
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index ba42295..7c42342 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Support/Debug.h"
@@ -57,7 +56,7 @@ EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
- : ARMGenRegisterInfo(), TII(tii), STI(sti),
+ : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti),
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
BasePtr(ARM::R6) {
}
@@ -354,7 +353,7 @@ const TargetRegisterClass*
ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
const {
const TargetRegisterClass *Super = RC;
- TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
do {
switch (Super->getID()) {
case ARM::GPRRegClassID:
@@ -375,6 +374,13 @@ ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
return ARM::GPRRegisterClass;
}
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &ARM::CCRRegClass)
+ return 0; // Can't copy CCR registers.
+ return RC;
+}
+
unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
@@ -487,19 +493,19 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
if (!TFI->hasFP(MF)) {
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPREven1);
+ return makeArrayRef(GPREven1);
else
- return ArrayRef<unsigned>(GPREven4);
+ return makeArrayRef(GPREven4);
} else if (FramePtr == ARM::R7) {
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPREven2);
+ return makeArrayRef(GPREven2);
else
- return ArrayRef<unsigned>(GPREven5);
+ return makeArrayRef(GPREven5);
} else { // FramePtr == ARM::R11
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPREven3);
+ return makeArrayRef(GPREven3);
else
- return ArrayRef<unsigned>(GPREven6);
+ return makeArrayRef(GPREven6);
}
} else if (HintType == ARMRI::RegPairOdd) {
if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
@@ -509,19 +515,19 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
if (!TFI->hasFP(MF)) {
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPROdd1);
+ return makeArrayRef(GPROdd1);
else
- return ArrayRef<unsigned>(GPROdd4);
+ return makeArrayRef(GPROdd4);
} else if (FramePtr == ARM::R7) {
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPROdd2);
+ return makeArrayRef(GPROdd2);
else
- return ArrayRef<unsigned>(GPROdd5);
+ return makeArrayRef(GPROdd5);
} else { // FramePtr == ARM::R11
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPROdd3);
+ return makeArrayRef(GPROdd3);
else
- return ArrayRef<unsigned>(GPROdd6);
+ return makeArrayRef(GPROdd6);
}
}
return RC->getRawAllocationOrder(MF);
@@ -649,10 +655,6 @@ cannotEliminateFrame(const MachineFunction &MF) const {
|| needsStackRealignment(MF);
}
-unsigned ARMBaseRegisterInfo::getRARegister() const {
- return ARM::LR;
-}
-
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -672,99 +674,54 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
return 0;
}
-int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int ARMBaseRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return ARMGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
-
unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
const MachineFunction &MF) const {
switch (Reg) {
default: break;
// Return 0 if either register of the pair is a special register.
// So no R12, etc.
- case ARM::R1:
- return ARM::R0;
- case ARM::R3:
- return ARM::R2;
- case ARM::R5:
- return ARM::R4;
+ case ARM::R1: return ARM::R0;
+ case ARM::R3: return ARM::R2;
+ case ARM::R5: return ARM::R4;
case ARM::R7:
return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
? 0 : ARM::R6;
- case ARM::R9:
- return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
- case ARM::R11:
- return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
-
- case ARM::S1:
- return ARM::S0;
- case ARM::S3:
- return ARM::S2;
- case ARM::S5:
- return ARM::S4;
- case ARM::S7:
- return ARM::S6;
- case ARM::S9:
- return ARM::S8;
- case ARM::S11:
- return ARM::S10;
- case ARM::S13:
- return ARM::S12;
- case ARM::S15:
- return ARM::S14;
- case ARM::S17:
- return ARM::S16;
- case ARM::S19:
- return ARM::S18;
- case ARM::S21:
- return ARM::S20;
- case ARM::S23:
- return ARM::S22;
- case ARM::S25:
- return ARM::S24;
- case ARM::S27:
- return ARM::S26;
- case ARM::S29:
- return ARM::S28;
- case ARM::S31:
- return ARM::S30;
-
- case ARM::D1:
- return ARM::D0;
- case ARM::D3:
- return ARM::D2;
- case ARM::D5:
- return ARM::D4;
- case ARM::D7:
- return ARM::D6;
- case ARM::D9:
- return ARM::D8;
- case ARM::D11:
- return ARM::D10;
- case ARM::D13:
- return ARM::D12;
- case ARM::D15:
- return ARM::D14;
- case ARM::D17:
- return ARM::D16;
- case ARM::D19:
- return ARM::D18;
- case ARM::D21:
- return ARM::D20;
- case ARM::D23:
- return ARM::D22;
- case ARM::D25:
- return ARM::D24;
- case ARM::D27:
- return ARM::D26;
- case ARM::D29:
- return ARM::D28;
- case ARM::D31:
- return ARM::D30;
+ case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
+ case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+ case ARM::S1: return ARM::S0;
+ case ARM::S3: return ARM::S2;
+ case ARM::S5: return ARM::S4;
+ case ARM::S7: return ARM::S6;
+ case ARM::S9: return ARM::S8;
+ case ARM::S11: return ARM::S10;
+ case ARM::S13: return ARM::S12;
+ case ARM::S15: return ARM::S14;
+ case ARM::S17: return ARM::S16;
+ case ARM::S19: return ARM::S18;
+ case ARM::S21: return ARM::S20;
+ case ARM::S23: return ARM::S22;
+ case ARM::S25: return ARM::S24;
+ case ARM::S27: return ARM::S26;
+ case ARM::S29: return ARM::S28;
+ case ARM::S31: return ARM::S30;
+
+ case ARM::D1: return ARM::D0;
+ case ARM::D3: return ARM::D2;
+ case ARM::D5: return ARM::D4;
+ case ARM::D7: return ARM::D6;
+ case ARM::D9: return ARM::D8;
+ case ARM::D11: return ARM::D10;
+ case ARM::D13: return ARM::D12;
+ case ARM::D15: return ARM::D14;
+ case ARM::D17: return ARM::D16;
+ case ARM::D19: return ARM::D18;
+ case ARM::D21: return ARM::D20;
+ case ARM::D23: return ARM::D22;
+ case ARM::D25: return ARM::D24;
+ case ARM::D27: return ARM::D26;
+ case ARM::D29: return ARM::D28;
+ case ARM::D31: return ARM::D30;
}
return 0;
@@ -776,85 +733,48 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
default: break;
// Return 0 if either register of the pair is a special register.
// So no R12, etc.
- case ARM::R0:
- return ARM::R1;
- case ARM::R2:
- return ARM::R3;
- case ARM::R4:
- return ARM::R5;
+ case ARM::R0: return ARM::R1;
+ case ARM::R2: return ARM::R3;
+ case ARM::R4: return ARM::R5;
case ARM::R6:
return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
? 0 : ARM::R7;
- case ARM::R8:
- return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
- case ARM::R10:
- return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
-
- case ARM::S0:
- return ARM::S1;
- case ARM::S2:
- return ARM::S3;
- case ARM::S4:
- return ARM::S5;
- case ARM::S6:
- return ARM::S7;
- case ARM::S8:
- return ARM::S9;
- case ARM::S10:
- return ARM::S11;
- case ARM::S12:
- return ARM::S13;
- case ARM::S14:
- return ARM::S15;
- case ARM::S16:
- return ARM::S17;
- case ARM::S18:
- return ARM::S19;
- case ARM::S20:
- return ARM::S21;
- case ARM::S22:
- return ARM::S23;
- case ARM::S24:
- return ARM::S25;
- case ARM::S26:
- return ARM::S27;
- case ARM::S28:
- return ARM::S29;
- case ARM::S30:
- return ARM::S31;
-
- case ARM::D0:
- return ARM::D1;
- case ARM::D2:
- return ARM::D3;
- case ARM::D4:
- return ARM::D5;
- case ARM::D6:
- return ARM::D7;
- case ARM::D8:
- return ARM::D9;
- case ARM::D10:
- return ARM::D11;
- case ARM::D12:
- return ARM::D13;
- case ARM::D14:
- return ARM::D15;
- case ARM::D16:
- return ARM::D17;
- case ARM::D18:
- return ARM::D19;
- case ARM::D20:
- return ARM::D21;
- case ARM::D22:
- return ARM::D23;
- case ARM::D24:
- return ARM::D25;
- case ARM::D26:
- return ARM::D27;
- case ARM::D28:
- return ARM::D29;
- case ARM::D30:
- return ARM::D31;
+ case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
+ case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+ case ARM::S0: return ARM::S1;
+ case ARM::S2: return ARM::S3;
+ case ARM::S4: return ARM::S5;
+ case ARM::S6: return ARM::S7;
+ case ARM::S8: return ARM::S9;
+ case ARM::S10: return ARM::S11;
+ case ARM::S12: return ARM::S13;
+ case ARM::S14: return ARM::S15;
+ case ARM::S16: return ARM::S17;
+ case ARM::S18: return ARM::S19;
+ case ARM::S20: return ARM::S21;
+ case ARM::S22: return ARM::S23;
+ case ARM::S24: return ARM::S25;
+ case ARM::S26: return ARM::S27;
+ case ARM::S28: return ARM::S29;
+ case ARM::S30: return ARM::S31;
+
+ case ARM::D0: return ARM::D1;
+ case ARM::D2: return ARM::D3;
+ case ARM::D4: return ARM::D5;
+ case ARM::D6: return ARM::D7;
+ case ARM::D8: return ARM::D9;
+ case ARM::D10: return ARM::D11;
+ case ARM::D12: return ARM::D13;
+ case ARM::D14: return ARM::D15;
+ case ARM::D16: return ARM::D17;
+ case ARM::D18: return ARM::D19;
+ case ARM::D20: return ARM::D21;
+ case ARM::D22: return ARM::D23;
+ case ARM::D24: return ARM::D25;
+ case ARM::D26: return ARM::D27;
+ case ARM::D28: return ARM::D29;
+ case ARM::D30: return ARM::D31;
}
return 0;
@@ -1111,11 +1031,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
- MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
- .addFrameIndex(FrameIdx).addImm(Offset);
+ MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset));
if (!AFI->isThumb1OnlyFunction())
- AddDefaultCC(AddDefaultPred(MIB));
+ AddDefaultCC(MIB);
}
void
@@ -1143,6 +1063,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
}
assert (Done && "Unable to resolve frame index!");
+ (void)Done;
}
bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index b4b4059..fee17ff 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -33,19 +33,6 @@ namespace ARMRI {
};
}
-/// isARMLowRegister - Returns true if the register is low register r0-r7.
-///
-static inline bool isARMLowRegister(unsigned Reg) {
- using namespace ARM;
- switch (Reg) {
- case R0: case R1: case R2: case R3:
- case R4: case R5: case R6: case R7:
- return true;
- default:
- return false;
- }
-}
-
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
@@ -129,6 +116,8 @@ public:
unsigned &NewSubIdx) const;
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
const TargetRegisterClass*
getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
@@ -164,7 +153,6 @@ public:
bool cannotEliminateFrame(const MachineFunction &MF) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getBaseRegister() const { return BasePtr; }
@@ -172,9 +160,6 @@ public:
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
bool isLowRegister(unsigned Reg) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
index d6fca62..4148d4a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -14,12 +14,12 @@
#define DEBUG_TYPE "jit"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
#include "ARMInstrInfo.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -161,11 +161,11 @@ namespace {
// are already handled elsewhere. They are placeholders to allow this
// encoder to continue to function until the MC encoder is sufficiently
// far along that this one can be eliminated entirely.
- unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
+ unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
const { return 0; }
- unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
+ unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
const { return 0; }
- unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
+ unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
const { return 0; }
unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
const { return 0; }
@@ -189,13 +189,17 @@ namespace {
unsigned Op) const { return 0; }
unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
- unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op)
+ unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
@@ -203,8 +207,12 @@ namespace {
const { return 0; }
unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
@@ -213,10 +221,6 @@ namespace {
const { return 0; }
unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
- unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
@@ -230,8 +234,6 @@ namespace {
const { return 0; }
unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
unsigned Op) const { return 0; }
- unsigned getMsbOpValue(const MachineInstr &MI,
- unsigned Op) const { return 0; }
unsigned getSsatBitPosValue(const MachineInstr &MI,
unsigned Op) const { return 0; }
uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
@@ -268,6 +270,8 @@ namespace {
const { return 0;}
uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
const { return 0;}
+ uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
const { return 0;}
uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
@@ -632,15 +636,16 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
<< (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
assert(ACPV->isGlobalValue() && "unsupported constant pool value");
- const GlobalValue *GV = ACPV->getGV();
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
if (GV) {
Reloc::Model RelocM = TM.getRelocationModel();
emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
isa<Function>(GV),
Subtarget->GVIsIndirectSymbol(GV, RelocM),
(intptr_t)ACPV);
- } else {
- emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+ } else {
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute);
}
emitWordLE(0);
} else {
@@ -983,7 +988,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
const MCInstrDesc &MCID) const {
- for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){
+ for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){
const MachineOperand &MO = MI.getOperand(i-1);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
return 1 << ARMII::S_BitShift;
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index f45ebdc..3e3a413 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -15,10 +15,10 @@
#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -739,7 +739,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond to anything in the source.
unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
- BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ if (!isThumb)
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ else
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
+ .addImm(ARMCC::AL).addReg(0);
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
@@ -1151,7 +1155,11 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
// targets will be exchanged, and the altered branch may be out of
// range, so the machinery has to know about it.
int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
@@ -1512,7 +1520,11 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
- BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ if (isThumb)
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
+ .addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
@@ -1891,7 +1903,8 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond directly to anything in the source.
assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
- BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB);
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
+ .addImm(ARMCC::AL).addReg(0);
// Update internal data structures to account for the newly inserted MBB.
MF.RenumberBlocks(NewBB);
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index 165a1d8..aadfd47 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -17,79 +17,57 @@
#include "llvm/Constants.h"
#include "llvm/GlobalValue.h"
#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
using namespace llvm;
-ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
- ARMCP::ARMCPKind K,
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolValue
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolValue::ARMConstantPoolValue(Type *Ty, unsigned id,
+ ARMCP::ARMCPKind kind,
unsigned char PCAdj,
- ARMCP::ARMCPModifier Modif,
- bool AddCA)
- : MachineConstantPoolValue((const Type*)cval->getType()),
- CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
- Modifier(Modif), AddCurrentAddress(AddCA) {}
-
-ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
- const char *s, unsigned id,
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue(Ty), LabelId(id), Kind(kind),
+ PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
+ ARMCP::ARMCPKind kind,
unsigned char PCAdj,
- ARMCP::ARMCPModifier Modif,
- bool AddCA)
- : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
- CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
- PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
-
-ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
- ARMCP::ARMCPModifier Modif)
- : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
- CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
- Modifier(Modif), AddCurrentAddress(false) {}
-
-const GlobalValue *ARMConstantPoolValue::getGV() const {
- return dyn_cast_or_null<GlobalValue>(CVal);
-}
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue((Type*)Type::getInt32Ty(C)),
+ LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
-const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
- return dyn_cast_or_null<BlockAddress>(CVal);
-}
+ARMConstantPoolValue::~ARMConstantPoolValue() {}
-static bool CPV_streq(const char *S1, const char *S2) {
- if (S1 == S2)
- return true;
- if (S1 && S2 && strcmp(S1, S2) == 0)
- return true;
- return false;
+const char *ARMConstantPoolValue::getModifierText() const {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+ // strings if that's legal.
+ case ARMCP::no_modifier: return "none";
+ case ARMCP::TLSGD: return "tlsgd";
+ case ARMCP::GOT: return "GOT";
+ case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOTTPOFF: return "gottpoff";
+ case ARMCP::TPOFF: return "tpoff";
+ }
}
int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- if (CPV->CVal == CVal &&
- CPV->LabelId == LabelId &&
- CPV->PCAdjust == PCAdjust &&
- CPV_streq(CPV->S, S) &&
- CPV->Modifier == Modifier)
- return i;
- }
- }
-
+ assert(false && "Shouldn't be calling this directly!");
return -1;
}
-ARMConstantPoolValue::~ARMConstantPoolValue() {
- free((void*)S);
-}
-
void
-ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
- ID.AddPointer(CVal);
- ID.AddPointer(S);
+ARMConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
ID.AddInteger(LabelId);
ID.AddInteger(PCAdjust);
}
@@ -97,9 +75,7 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
bool
ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
if (ACPV->Kind == Kind &&
- ACPV->CVal == CVal &&
ACPV->PCAdjust == PCAdjust &&
- CPV_streq(ACPV->S, S) &&
ACPV->Modifier == Modifier) {
if (ACPV->LabelId == LabelId)
return true;
@@ -115,12 +91,7 @@ void ARMConstantPoolValue::dump() const {
errs() << " " << *this;
}
-
void ARMConstantPoolValue::print(raw_ostream &O) const {
- if (CVal)
- O << CVal->getName();
- else
- O << S;
if (Modifier) O << "(" << getModifierText() << ")";
if (PCAdjust != 0) {
O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
@@ -128,3 +99,221 @@ void ARMConstantPoolValue::print(raw_ostream &O) const {
O << ")";
}
}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolConstant
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(Type *Ty,
+ const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(Ty, ID, Kind, PCAdj, Modifier, AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue((Type*)C->getType(), ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
+ return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier) {
+ return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()),
+ GV, 0, ARMCP::CPValue, 0,
+ Modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress);
+}
+
+const GlobalValue *ARMConstantPoolConstant::getGV() const {
+ return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
+ return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV);
+ if (!APC) continue;
+ if (APC->CVal == CVal && equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolConstant *ACPC = dyn_cast<ARMConstantPoolConstant>(ACPV);
+ return ACPC && ACPC->CVal == CVal && ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(CVal);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolConstant::print(raw_ostream &O) const {
+ O << CVal->getName();
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolSymbol
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
+ unsigned id,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
+ AddCurrentAddress),
+ S(strdup(s)) {}
+
+ARMConstantPoolSymbol::~ARMConstantPoolSymbol() {
+ free((void*)S);
+}
+
+ARMConstantPoolSymbol *
+ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj) {
+ return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+static bool CPV_streq(const char *S1, const char *S2) {
+ if (S1 == S2)
+ return true;
+ if (S1 && S2 && strcmp(S1, S2) == 0)
+ return true;
+ return false;
+}
+
+int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
+ if (!APS) continue;
+
+ if (CPV_streq(APS->S, S) && equals(APS))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
+ return ACPS && CPV_streq(ACPS->S, S) &&
+ ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(S);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolSymbol::print(raw_ostream &O) const {
+ O << S;
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolMBB
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolMBB::ARMConstantPoolMBB(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned id, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPMachineBasicBlock, PCAdj,
+ Modifier, AddCurrentAddress),
+ MBB(mbb) {}
+
+ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID,
+ unsigned char PCAdj) {
+ return new ARMConstantPoolMBB(C, mbb, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV);
+ if (!APMBB) continue;
+
+ if (APMBB->MBB == MBB && equals(APMBB))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolMBB *ACPMBB = dyn_cast<ARMConstantPoolMBB>(ACPV);
+ return ACPMBB && ACPMBB->MBB == MBB &&
+ ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(MBB);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolMBB::print(raw_ostream &O) const {
+ ARMConstantPoolValue::print(O);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index d008811..0d0def3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -20,17 +20,19 @@
namespace llvm {
-class Constant;
class BlockAddress;
+class Constant;
class GlobalValue;
class LLVMContext;
+class MachineBasicBlock;
namespace ARMCP {
enum ARMCPKind {
CPValue,
CPExtSymbol,
CPBlockAddress,
- CPLSDA
+ CPLSDA,
+ CPMachineBasicBlock
};
enum ARMCPModifier {
@@ -47,8 +49,6 @@ namespace ARMCP {
/// represent PC-relative displacement between the address of the load
/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
class ARMConstantPoolValue : public MachineConstantPoolValue {
- const Constant *CVal; // Constant being loaded.
- const char *S; // ExtSymbol being loaded.
unsigned LabelId; // Label id of the load.
ARMCP::ARMCPKind Kind; // Kind of constant.
unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative.
@@ -56,60 +56,54 @@ class ARMConstantPoolValue : public MachineConstantPoolValue {
ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
bool AddCurrentAddress;
+protected:
+ ARMConstantPoolValue(Type *Ty, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
public:
- ARMConstantPoolValue(const Constant *cval, unsigned id,
- ARMCP::ARMCPKind Kind = ARMCP::CPValue,
- unsigned char PCAdj = 0,
- ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
- bool AddCurrentAddress = false);
- ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
- unsigned char PCAdj = 0,
- ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
- bool AddCurrentAddress = false);
- ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier);
- ARMConstantPoolValue();
- ~ARMConstantPoolValue();
+ virtual ~ARMConstantPoolValue();
- const GlobalValue *getGV() const;
- const char *getSymbol() const { return S; }
- const BlockAddress *getBlockAddress() const;
ARMCP::ARMCPModifier getModifier() const { return Modifier; }
- const char *getModifierText() const {
- switch (Modifier) {
- default: llvm_unreachable("Unknown modifier!");
- // FIXME: Are these case sensitive? It'd be nice to lower-case all the
- // strings if that's legal.
- case ARMCP::no_modifier: return "none";
- case ARMCP::TLSGD: return "tlsgd";
- case ARMCP::GOT: return "GOT";
- case ARMCP::GOTOFF: return "GOTOFF";
- case ARMCP::GOTTPOFF: return "gottpoff";
- case ARMCP::TPOFF: return "tpoff";
- }
- }
+ const char *getModifierText() const;
bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+
bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+
unsigned getLabelId() const { return LabelId; }
unsigned char getPCAdjustment() const { return PCAdjust; }
+
bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
- bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
- bool isLSDA() { return Kind == ARMCP::CPLSDA; }
+ bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
+ bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
+ bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
virtual unsigned getRelocationInfo() const { return 2; }
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment);
- virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
- /// hasSameValue - Return true if this ARM constpool value
- /// can share the same constantpool entry as another ARM constpool value.
- bool hasSameValue(ARMConstantPoolValue *ACPV);
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ bool equals(const ARMConstantPoolValue *A) const {
+ return this->LabelId == A->LabelId &&
+ this->PCAdjust == A->PCAdjust &&
+ this->Modifier == A->Modifier;
+ }
+ virtual void print(raw_ostream &O) const;
void print(raw_ostream *O) const { if (O) print(*O); }
- void print(raw_ostream &O) const;
void dump() const;
+
+ static bool classof(const ARMConstantPoolValue *) { return true; }
};
inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
@@ -117,6 +111,123 @@ inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
return O;
}
+/// ARMConstantPoolConstant - ARM-specific constant pool values for Constants,
+/// Functions, and BlockAddresses.
+class ARMConstantPoolConstant : public ARMConstantPoolValue {
+ const Constant *CVal; // Constant being loaded.
+
+ ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+ ARMConstantPoolConstant(Type *Ty, const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID);
+ static ARMConstantPoolConstant *Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ const GlobalValue *getGV() const;
+ const BlockAddress *getBlockAddress() const;
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ virtual void print(raw_ostream &O) const;
+ static bool classof(const ARMConstantPoolValue *APV) {
+ return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
+ }
+ static bool classof(const ARMConstantPoolConstant *) { return true; }
+};
+
+/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
+/// symbols.
+class ARMConstantPoolSymbol : public ARMConstantPoolValue {
+ const char *S; // ExtSymbol being loaded.
+
+ ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ ~ARMConstantPoolSymbol();
+
+ static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj);
+
+ const char *getSymbol() const { return S; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void print(raw_ostream &O) const;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isExtSymbol();
+ }
+ static bool classof(const ARMConstantPoolSymbol *) { return true; }
+};
+
+/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
+/// block.
+class ARMConstantPoolMBB : public ARMConstantPoolValue {
+ const MachineBasicBlock *MBB; // Machine basic block.
+
+ ARMConstantPoolMBB(LLVMContext &C, const MachineBasicBlock *mbb, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolMBB *Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID, unsigned char PCAdj);
+
+ const MachineBasicBlock *getMBB() const { return MBB; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void print(raw_ostream &O) const;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isMachineBasicBlock();
+ }
+ static bool classof(const ARMConstantPoolMBB *) { return true; }
+};
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 94b72fd..7872cb9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -16,19 +16,24 @@
#define DEBUG_TYPE "arm-pseudo"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
using namespace llvm;
+static cl::opt<bool>
+VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
+ cl::desc("Verify machine code after expanding ARM pseudos"));
+
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
public:
@@ -741,8 +746,22 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM::MOVCCs: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ case ARM::MOVCCsi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ (MI.getOperand(1).getReg()))
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addReg(MI.getOperand(5).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVCCsr: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
.addReg(MI.getOperand(2).getReg(),
getKillRegState(MI.getOperand(2).isKill()))
@@ -837,10 +856,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
// These are just fancy MOVs insructions.
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
- .addReg(0)
.addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
ARM_AM::lsr : ARM_AM::asr),
1)))
@@ -851,10 +869,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
.addReg(0);
TransferImpOps(MI, MIB, MIB);
@@ -953,34 +970,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandMOV32BitImm(MBB, MBBI);
return true;
- case ARM::VMOVQQ: {
- unsigned DstReg = MI.getOperand(0).getReg();
- bool DstIsDead = MI.getOperand(0).isDead();
- unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
- unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1);
- unsigned SrcReg = MI.getOperand(1).getReg();
- bool SrcIsKill = MI.getOperand(1).isKill();
- unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
- unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1);
- MachineInstrBuilder Even =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::VORRq))
- .addReg(EvenDst,
- RegState::Define | getDeadRegState(DstIsDead))
- .addReg(EvenSrc, getKillRegState(SrcIsKill))
- .addReg(EvenSrc, getKillRegState(SrcIsKill)));
- MachineInstrBuilder Odd =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::VORRq))
- .addReg(OddDst,
- RegState::Define | getDeadRegState(DstIsDead))
- .addReg(OddSrc, getKillRegState(SrcIsKill))
- .addReg(OddSrc, getKillRegState(SrcIsKill)));
- TransferImpOps(MI, Even, Odd);
- MI.eraseFromParent();
- return true;
- }
-
case ARM::VLDMQIA: {
unsigned NewOpc = ARM::VLDMDIA;
MachineInstrBuilder MIB =
@@ -1316,6 +1305,8 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++MFI)
Modified |= ExpandMBB(*MFI);
+ if (VerifyARMPseudo)
+ MF.verify(this, "After expanding ARM pseudo instructions.");
return Modified;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index f469d7e..9bc7ef2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -14,13 +14,13 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMCallingConv.h"
#include "ARMRegisterInfo.h"
#include "ARMTargetMachine.h"
#include "ARMSubtarget.h"
#include "ARMConstantPoolValue.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
@@ -171,8 +171,8 @@ class ARMFastISel : public FastISel {
// Utility routines.
private:
- bool isTypeLegal(const Type *Ty, MVT &VT);
- bool isLoadTypeLegal(const Type *Ty, MVT &VT);
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
@@ -502,11 +502,19 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
// This checks to see if we can use VFP3 instructions to materialize
// a constant, otherwise we have to go through the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
- unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+ int Imm;
+ unsigned Opc;
+ if (is64bit) {
+ Imm = ARM_AM::getFP64Imm(Val);
+ Opc = ARM::FCONSTD;
+ } else {
+ Imm = ARM_AM::getFP32Imm(Val);
+ Opc = ARM::FCONSTS;
+ }
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
DestReg)
- .addFPImm(CFP));
+ .addImm(Imm));
return DestReg;
}
@@ -590,8 +598,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
unsigned Id = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
- ARMCP::CPValue, PCAdj);
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+ ARMCP::CPValue,
+ PCAdj);
unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
// Load value.
@@ -615,8 +624,8 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb)
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRi12),
- NewDestReg)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRi12), NewDestReg)
.addReg(DestReg)
.addImm(0);
else
@@ -673,7 +682,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
return 0;
}
-bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
// Only handle simple types.
@@ -685,7 +694,7 @@ bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
return TLI.isTypeLegal(VT);
}
-bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
+bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT)) return true;
// If this is a type than can be sign or zero-extended to a basic operation
@@ -714,7 +723,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
U = C;
}
- if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
@@ -749,7 +758,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
const Value *Op = *i;
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
@@ -946,6 +955,10 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
}
bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getType(), VT))
@@ -1008,6 +1021,10 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
Value *Op0 = I->getOperand(0);
unsigned SrcReg = 0;
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
@@ -1085,7 +1102,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// TODO: Factor this out.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
MVT SourceVT;
- const Type *Ty = CI->getOperand(0)->getType();
+ Type *Ty = CI->getOperand(0)->getType();
if (CI->hasOneUse() && (CI->getParent() == I->getParent())
&& isTypeLegal(Ty, SourceVT)) {
bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
@@ -1201,7 +1218,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
MVT VT;
- const Type *Ty = CI->getOperand(0)->getType();
+ Type *Ty = CI->getOperand(0)->getType();
if (!isTypeLegal(Ty, VT))
return false;
@@ -1309,7 +1326,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
if (!Subtarget->hasVFP2()) return false;
MVT DstVT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, DstVT))
return false;
@@ -1328,7 +1345,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
unsigned Opc;
if (Ty->isFloatTy()) Opc = ARM::VSITOS;
else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
- else return 0;
+ else return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
@@ -1343,7 +1360,7 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) {
if (!Subtarget->hasVFP2()) return false;
MVT DstVT;
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
if (!isTypeLegal(RetTy, DstVT))
return false;
@@ -1351,10 +1368,10 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) {
if (Op == 0) return false;
unsigned Opc;
- const Type *OpTy = I->getOperand(0)->getType();
+ Type *OpTy = I->getOperand(0)->getType();
if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
- else return 0;
+ else return false;
// f64->s32 or f32->s32 both need an intermediate f32 reg.
unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
@@ -1401,7 +1418,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
bool ARMFastISel::SelectSDiv(const Instruction *I) {
MVT VT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
return false;
@@ -1429,7 +1446,7 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) {
bool ARMFastISel::SelectSRem(const Instruction *I) {
MVT VT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
return false;
@@ -1456,7 +1473,7 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
// operations, but can't figure out how to. Just use the vfp instructions
// if we have them.
// FIXME: It'd be nice to use NEON instructions.
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
if (isFloat && !Subtarget->hasVFP2())
return false;
@@ -1711,7 +1728,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
const Value *RV = Ret->getOperand(0);
@@ -1778,7 +1795,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
// Handle *simple* calls for now.
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
@@ -1802,7 +1819,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
unsigned Arg = getRegForValue(Op);
if (Arg == 0) return false;
- const Type *ArgTy = Op->getType();
+ Type *ArgTy = Op->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT)) return false;
@@ -1870,13 +1887,13 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
// TODO: Avoid some calling conventions?
// Let SDISel handle vararg functions.
- const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
if (FTy->isVarArg())
return false;
// Handle *simple* calls for now.
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
@@ -1915,7 +1932,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
- const Type *ArgTy = (*i)->getType();
+ Type *ArgTy = (*i)->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
@@ -1969,9 +1986,9 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) {
// On ARM, in general, integer casts don't involve legal types; this code
// handles promotable integers. The high bits for a type smaller than
// the register size are assumed to be undefined.
- const Type *DestTy = I->getType();
+ Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
- const Type *SrcTy = Op->getType();
+ Type *SrcTy = Op->getType();
EVT SrcVT, DestVT;
SrcVT = TLI.getValueType(SrcTy, true);
@@ -2002,16 +2019,18 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) {
switch (SrcVT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i16:
+ if (!Subtarget->hasV6Ops()) return false;
if (isZext)
- Opc = isThumb ? ARM::t2UXTHr : ARM::UXTHr;
+ Opc = isThumb ? ARM::t2UXTH : ARM::UXTH;
else
- Opc = isThumb ? ARM::t2SXTHr : ARM::SXTHr;
+ Opc = isThumb ? ARM::t2SXTH : ARM::SXTH;
break;
case MVT::i8:
+ if (!Subtarget->hasV6Ops()) return false;
if (isZext)
- Opc = isThumb ? ARM::t2UXTBr : ARM::UXTBr;
+ Opc = isThumb ? ARM::t2UXTB : ARM::UXTB;
else
- Opc = isThumb ? ARM::t2SXTBr : ARM::SXTBr;
+ Opc = isThumb ? ARM::t2SXTB : ARM::SXTB;
break;
case MVT::i1:
if (isZext) {
@@ -2033,6 +2052,8 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) {
.addReg(SrcReg);
if (isBoolZext)
MIB.addImm(1);
+ else
+ MIB.addImm(0);
AddOptionalDefs(MIB);
UpdateValueMap(I, DestReg);
return true;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 381b404..2d1de6f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "ARMFrameLowering.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -93,7 +93,8 @@ static bool isCSRestore(MachineInstr *MI,
return false;
return true;
}
- if ((MI->getOpcode() == ARM::LDR_POST ||
+ if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
+ MI->getOpcode() == ARM::LDR_POST_REG ||
MI->getOpcode() == ARM::t2LDR_POST) &&
isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
MI->getOperand(1).getReg() == ARM::SP)
@@ -413,6 +414,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
+
+ // Add the default predicate in Thumb mode.
+ if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
@@ -502,7 +506,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
}
}
} else if (AFI->isThumb2Function()) {
- // Use add <rd>, sp, #<imm8>
+ // Use add <rd>, sp, #<imm8>
// ldr <rd>, [sp, #<imm8>]
// if at all possible to save space.
if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
@@ -587,14 +591,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
ARM::SP)
.addReg(Regs[0].first, getKillRegState(Regs[0].second))
- .addReg(ARM::SP).setMIFlags(MIFlags);
- // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
- // that refactoring is complete (eventually).
- if (StrOpc == ARM::STR_PRE) {
- MIB.addReg(0);
- MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
- } else
- MIB.addImm(-4);
+ .addReg(ARM::SP).setMIFlags(MIFlags)
+ .addImm(-4);
AddDefaultPred(MIB);
}
Regs.clear();
@@ -651,8 +649,10 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
.addReg(ARM::SP));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
- if (DeleteRet)
+ if (DeleteRet) {
+ MIB->copyImplicitOps(&*MI);
MI->eraseFromParent();
+ }
MI = MIB;
} else if (Regs.size() == 1) {
// If we adjusted the reg to PC from LR above, switch it back here. We
@@ -665,7 +665,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
.addReg(ARM::SP);
// ARM mode needs an extra reg0 here due to addrmode2. Will go away once
// that refactoring is complete (eventually).
- if (LdrOpc == ARM::LDR_POST) {
+ if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
MIB.addReg(0);
MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
} else
@@ -687,7 +687,8 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
- unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+ unsigned PushOneOpc = AFI->isThumbFunction() ?
+ ARM::t2STR_PRE : ARM::STR_PRE_IMM;
unsigned FltOpc = ARM::VSTMDDB_UPD;
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
MachineInstr::FrameSetup);
@@ -711,7 +712,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
- unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+ unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
index 8d77b2d..5f863ea 100644
--- a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -100,8 +100,8 @@ namespace {
GlobalCmp(const TargetData *td) : TD(td) { }
bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
- const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+ Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
}
@@ -123,7 +123,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// FIXME: Find better heuristics
std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
- const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
for (size_t i = 0, e = Globals.size(); i != e; ) {
size_t j = 0;
@@ -150,7 +150,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
ConstantInt::get(Int32Ty, 0),
ConstantInt::get(Int32Ty, k-i)
};
- Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
}
@@ -176,7 +176,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
// Ignore fancy-aligned globals for now.
unsigned Alignment = I->getAlignment();
- const Type *Ty = I->getType()->getElementType();
+ Type *Ty = I->getType()->getElementType();
if (Alignment > TD->getABITypeAlignment(Ty))
continue;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2c9481b..5ee009c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -14,8 +14,8 @@
#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMAddressingModes.h"
#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
+static cl::opt<bool>
+DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
+ cl::desc("Enable / disable ARM integer abs transform"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -90,13 +95,20 @@ public:
bool hasNoVMLxHazardUse(SDNode *N) const;
bool isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
- bool SelectShifterOperandReg(SDValue N, SDValue &A,
+ bool SelectRegShifterOperand(SDValue N, SDValue &A,
SDValue &B, SDValue &C,
bool CheckProfitability = true);
- bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+ bool SelectImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, bool CheckProfitability = true);
+ bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
SDValue &B, SDValue &C) {
// Don't apply the profitability check
- return SelectShifterOperandReg(N, A, B, C, false);
+ return SelectRegShifterOperand(N, A, B, C, false);
+ }
+ bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B) {
+ // Don't apply the profitability check
+ return SelectImmShifterOperand(N, A, B, false);
}
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
@@ -122,8 +134,13 @@ public:
return true;
}
- bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
+ bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
bool SelectAddrMode3(SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
@@ -240,8 +257,13 @@ private:
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
+ // Select special operations if node forms integer ABS pattern
+ SDNode *SelectABSOp(SDNode *N);
+
SDNode *SelectConcatVector(SDNode *N);
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -291,10 +313,10 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
///
/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
-static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
+static bool isScaledConstantInRange(SDValue Node, int Scale,
int RangeMin, int RangeMax,
int &ScaledConstant) {
- assert(Scale && "Invalid scale!");
+ assert(Scale > 0 && "Invalid scale!");
// Check that this is a constant.
const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
@@ -365,7 +387,30 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
}
-bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
+bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
+ SDValue &BaseReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!RHS) return false;
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc,
@@ -373,7 +418,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
if (DisableShifterOp)
return false;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
@@ -381,19 +426,18 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- ShReg = CurDAG->getRegister(0, MVT::i32);
- ShImmVal = RHS->getZExtValue() & 31;
- } else {
- ShReg = N.getOperand(1);
- if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
- return false;
- }
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (RHS) return false;
+
+ ShReg = N.getOperand(1);
+ if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ return false;
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
MVT::i32);
return true;
}
+
bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
SDValue &Base,
SDValue &OffImm) {
@@ -483,13 +527,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
return false;
}
- if (Subtarget->isCortexA9() && !N.hasOneUse())
- // Compute R +/- (R << N) and reuse it.
- return false;
-
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
unsigned ShAmt = 0;
Base = N.getOperand(0);
@@ -515,16 +556,14 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
- ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
// fold it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
- if (!Subtarget->isCortexA9() ||
- (N.hasOneUse() &&
- isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
@@ -630,7 +669,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
unsigned ShAmt = 0;
Base = N.getOperand(0);
@@ -656,16 +696,14 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
- ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
// fold it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
- if (!Subtarget->isCortexA9() ||
- (N.hasOneUse() &&
- isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
@@ -683,7 +721,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
return AM2_SHOP;
}
-bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc) {
unsigned Opcode = Op->getOpcode();
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
@@ -692,16 +730,11 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
? ARM_AM::add : ARM_AM::sub;
int Val;
- if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
- Offset = CurDAG->getRegister(0, MVT::i32);
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
- ARM_AM::no_shift),
- MVT::i32);
- return true;
- }
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
+ return false;
Offset = N;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
unsigned ShAmt = 0;
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't fold
@@ -724,6 +757,50 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
return true;
}
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ if (AddSub == ARM_AM::sub) Val *= -1;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(Val, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+}
bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
SDValue &Base, SDValue &Offset,
@@ -1079,7 +1156,7 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
if (DisableShifterOp)
return false;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
@@ -1208,21 +1285,15 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
return false;
}
- if (Subtarget->isCortexA9() && !N.hasOneUse()) {
- // Compute R + (R << [1,2,3]) and reuse it.
- Base = N;
- return false;
- }
-
// Look for (R + R) or (R + (R << [1,2,3])).
unsigned ShAmt = 0;
Base = N.getOperand(0);
OffReg = N.getOperand(1);
// Swap if it is ((R << c) + R).
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg);
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
if (ShOpcVal != ARM_AM::lsl) {
- ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
+ ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
if (ShOpcVal == ARM_AM::lsl)
std::swap(Base, OffReg);
}
@@ -1266,10 +1337,19 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
unsigned Opcode = 0;
bool Match = false;
- if (LoadedVT == MVT::i32 &&
- SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
- Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+ if (LoadedVT == MVT::i32 && isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_PRE_IMM;
+ Match = true;
+ } else if (LoadedVT == MVT::i32 && !isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_POST_IMM;
Match = true;
+ } else if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
+ Match = true;
+
} else if (LoadedVT == MVT::i16 &&
SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
@@ -1283,20 +1363,37 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
}
} else {
- if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ if (isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_PRE_IMM;
+ } else if (!isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_POST_IMM;
+ } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
- Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+ Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
}
}
}
if (Match) {
- SDValue Chain = LD->getChain();
- SDValue Base = LD->getBasePtr();
- SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
- CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
- MVT::Other, Ops, 6);
+ if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ MVT::i32, MVT::Other, Ops, 5);
+ } else {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ MVT::i32, MVT::Other, Ops, 6);
+ }
}
return NULL;
@@ -1966,7 +2063,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
- unsigned Width = CountTrailingOnes_32(And_imm);
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = CountTrailingOnes_32(And_imm) - 1;
unsigned LSB = Srl_imm;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0),
@@ -1986,7 +2084,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
unsigned Srl_imm = 0;
if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
- unsigned Width = 32 - Srl_imm;
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = 32 - Srl_imm - 1;
int LSB = Srl_imm - Shl_imm;
if (LSB < 0)
return NULL;
@@ -2034,10 +2133,16 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
SDValue CPTmp0;
SDValue CPTmp1;
SDValue CPTmp2;
- if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
+ }
+
+ if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
}
return 0;
}
@@ -2198,6 +2303,56 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
+/// Target-specific DAG combining for ISD::XOR.
+/// Target-independent combining lowers SELECT_CC nodes of the form
+/// select_cc setg[ge] X, 0, X, -X
+/// select_cc setgt X, -1, X, -X
+/// select_cc setl[te] X, 0, -X, X
+/// select_cc setlt X, 1, -X, X
+/// which represent Integer ABS into:
+/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// ARM instruction selection detects the latter and matches it to
+/// ARM::ABS or ARM::t2ABS machine node.
+SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
+ SDValue XORSrc0 = N->getOperand(0);
+ SDValue XORSrc1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ if (DisableARMIntABS)
+ return NULL;
+
+ if (Subtarget->isThumb1Only())
+ return NULL;
+
+ if (XORSrc0.getOpcode() != ISD::ADD ||
+ XORSrc1.getOpcode() != ISD::SRA)
+ return NULL;
+
+ SDValue ADDSrc0 = XORSrc0.getOperand(0);
+ SDValue ADDSrc1 = XORSrc0.getOperand(1);
+ SDValue SRASrc0 = XORSrc1.getOperand(0);
+ SDValue SRASrc1 = XORSrc1.getOperand(1);
+ ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
+ EVT XType = SRASrc0.getValueType();
+ unsigned Size = XType.getSizeInBits() - 1;
+
+ if (ADDSrc1 == XORSrc1 &&
+ ADDSrc0 == SRASrc0 &&
+ XType.isInteger() &&
+ SRAConstant != NULL &&
+ Size == SRAConstant->getZExtValue()) {
+
+ unsigned Opcode = ARM::ABS;
+ if (Subtarget->isThumb2())
+ Opcode = ARM::t2ABS;
+
+ return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ }
+
+ return NULL;
+}
+
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
@@ -2207,6 +2362,25 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
}
+SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(Node->getOperand(1)); // Ptr
+ Ops.push_back(Node->getOperand(2)); // Low part of Val1
+ Ops.push_back(Node->getOperand(3)); // High part of Val1
+ if (Opc == ARM::ATOMCMPXCHG6432) {
+ Ops.push_back(Node->getOperand(4)); // Low part of Val2
+ Ops.push_back(Node->getOperand(5)); // High part of Val2
+ }
+ Ops.push_back(Node->getOperand(0)); // Chain
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other,
+ Ops.data() ,Ops.size());
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ return ResNode;
+}
+
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
@@ -2215,6 +2389,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::XOR: {
+ // Select special operations if XOR node forms integer ABS pattern
+ SDNode *ResNode = SelectABSOp(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
bool UseCP = true;
@@ -2269,8 +2451,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
if (Subtarget->isThumb1Only()) {
- return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
- CurDAG->getTargetConstant(0, MVT::i32));
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
@@ -2307,7 +2490,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
}
}
if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
@@ -2323,7 +2506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
}
}
}
@@ -2986,6 +3169,23 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
+
+ case ARMISD::ATOMOR64_DAG:
+ return SelectAtomic64(N, ARM::ATOMOR6432);
+ case ARMISD::ATOMXOR64_DAG:
+ return SelectAtomic64(N, ARM::ATOMXOR6432);
+ case ARMISD::ATOMADD64_DAG:
+ return SelectAtomic64(N, ARM::ATOMADD6432);
+ case ARMISD::ATOMSUB64_DAG:
+ return SelectAtomic64(N, ARM::ATOMSUB6432);
+ case ARMISD::ATOMNAND64_DAG:
+ return SelectAtomic64(N, ARM::ATOMNAND6432);
+ case ARMISD::ATOMAND64_DAG:
+ return SelectAtomic64(N, ARM::ATOMAND6432);
+ case ARMISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(N, ARM::ATOMSWAP6432);
+ case ARMISD::ATOMCMPXCHG64_DAG:
+ return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
}
return SelectCode(N);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index cf8c5ba..e44e356 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMISelLowering.h"
@@ -24,6 +23,7 @@
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -106,7 +107,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
- setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
if (ElemTy != MVT::i32) {
setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
@@ -178,6 +179,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
RegInfo = TM.getRegisterInfo();
Itins = TM.getInstrItineraryData();
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
@@ -419,6 +422,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
}
+ // Use divmod compiler-rt calls for iOS 5.0 and later.
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+ setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+ setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+ }
+
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
@@ -453,7 +463,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
- setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -485,8 +495,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
- setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
- setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
// a destination type that is wider than the source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
@@ -551,6 +561,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
+ if (!Subtarget->isThumb1Only()) {
+ // FIXME: We should do this for Thumb1 as well.
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
+ }
+
// ARM does not have ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
@@ -596,62 +614,46 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
+ // FIXME: This should be checking for v6k, not just v6.
if (Subtarget->hasDataBarrier() ||
(Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ // Custom lowering for 64-bit ops
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
} else {
// Set them all for expansion, which will force libcalls.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
+ // Unordered/Monotonic case.
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
// Since the libcalls include locking, fold in the fences
setShouldFoldAtomicFences(true);
}
- // 64-bit versions are always libcalls (for now)
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -839,6 +841,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
+ case ARMISD::ADDC: return "ARMISD::ADDC";
+ case ARMISD::ADDE: return "ARMISD::ADDE";
+ case ARMISD::SUBC: return "ARMISD::SUBC";
+ case ARMISD::SUBE: return "ARMISD::SUBE";
+
case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
@@ -935,6 +942,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
+EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return getPointerTy();
+ return VT.changeVectorElementTypeToInteger();
+}
+
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
@@ -1210,8 +1222,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
MachineFunction &MF = DAG.getMachineFunction();
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool IsSibCall = false;
- // Temporarily disable tail calls so things don't break.
- if (!EnableARMTailCalls)
+ // Disable tail calls if they're not supported.
+ if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
@@ -1336,10 +1348,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
MVT::i32);
+ // TODO: Disable AlwaysInline when it becomes possible
+ // to emit a nested call sequence.
MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
Flags.getByValAlign(),
/*isVolatile=*/false,
- /*AlwaysInline=*/false,
+ /*AlwaysInline=*/true,
MachinePointerInfo(0),
MachinePointerInfo(0)));
@@ -1404,9 +1418,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const GlobalValue *GV = G->getGlobal();
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
- ARMPCLabelIndex,
- ARMCP::CPValue, 0);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
+
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1419,8 +1433,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
- Sym, ARMPCLabelIndex, 0);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 0);
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1441,9 +1456,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
- ARMPCLabelIndex,
- ARMCP::CPValue, 4);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
@@ -1470,8 +1484,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
- Sym, ARMPCLabelIndex, 4);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
@@ -1940,9 +1955,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
- ARMCP::CPBlockAddress,
- PCAdj);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
+ ARMCP::CPBlockAddress, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
@@ -1966,8 +1981,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
- ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
@@ -1982,11 +1997,11 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
- Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+ Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
// FIXME: is there useful debug info available here?
std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
+ LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
@@ -2013,8 +2028,9 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
// Initial exec model.
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
- ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
+ true);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
@@ -2030,7 +2046,8 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
false, false, 0);
} else {
// local exec model
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
@@ -2066,7 +2083,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
if (RelocM == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ ARMConstantPoolConstant::Create(GV,
+ UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
@@ -2135,7 +2153,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
ARMPCLabelIndex = AFI->createPICLabelUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -2167,9 +2186,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
- "_GLOBAL_OFFSET_TABLE_",
- ARMPCLabelIndex, PCAdj);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex, PCAdj);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
@@ -2191,7 +2210,8 @@ SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue Val = DAG.getConstant(0, MVT::i32);
- return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
Op.getOperand(1), Val);
}
@@ -2224,8 +2244,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
unsigned PCAdj = (RelocM != Reloc::PIC_)
? 0 : (Subtarget->isThumb() ? 4 : 8);
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
- ARMCP::CPLSDA, PCAdj);
+ ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
+ ARMCP::CPLSDA, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result =
@@ -2277,6 +2297,25 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(DMBOpt, MVT::i32));
}
+
+static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // FIXME: handle "fence singlethread" more efficiently.
+ DebugLoc dl = Op.getDebugLoc();
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(ARM_MB::ISH, MVT::i32));
+}
+
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// ARM pre v5TE and Thumb1 does not have preload instructions.
@@ -2754,7 +2793,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
@@ -2993,8 +3032,8 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- EVT OperandVT = Op.getOperand(0).getValueType();
- assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!");
+ assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+ "Invalid type for custom lowering!");
if (VT != MVT::v4f32)
return DAG.UnrollVectorOp(Op.getNode());
@@ -3905,8 +3944,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
// Try an immediate VMVN.
- uint64_t NegatedImm = (SplatBits.getZExtValue() ^
- ((1LL << SplatBitSize) - 1));
+ uint64_t NegatedImm = (~SplatBits).getZExtValue();
Val = isNEONModifiedImm(NegatedImm,
SplatUndef.getZExtValue(), SplatBitSize,
DAG, VmovVT, VT.is128BitVector(),
@@ -4019,6 +4057,14 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// A shuffle can only come from building a vector from various
// elements of other vectors.
return SDValue();
+ } else if (V.getOperand(0).getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // This code doesn't know how to handle shuffles where the vector
+ // element types do not match (this happens because type legalization
+ // promotes the return type of EXTRACT_VECTOR_ELT).
+ // FIXME: It might be appropriate to extend this code to handle
+ // mismatched types.
+ return SDValue();
}
// Record this extraction against the appropriate vector if possible...
@@ -4819,6 +4865,71 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
return N0;
}
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Invalid code");
+ case ISD::ADDC: Opc = ARMISD::ADDC; break;
+ case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
+ case ISD::SUBC: Opc = ARMISD::SUBC; break;
+ case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
+static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
+ // Monotonic load/store is legal for all targets
+ if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
+ return Op;
+
+ // Aquire/Release load/store is not legal for targets without a
+ // dmb or equivalent available.
+ return SDValue();
+}
+
+
+static void
+ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG, unsigned NewOp) {
+ DebugLoc dl = Node->getDebugLoc();
+ assert (Node->getValueType(0) == MVT::i64 &&
+ "Only know how to expand i64 atomics");
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(Node->getOperand(0)); // Chain
+ Ops.push_back(Node->getOperand(1)); // Ptr
+ // Low part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0)));
+ // High part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1)));
+ if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
+ // High part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(3), DAG.getIntPtrConstant(0)));
+ // High part of Val2
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ }
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
+ cast<MemSDNode>(Node)->getMemOperand());
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -4834,6 +4945,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
@@ -4856,7 +4968,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
- case ISD::VSETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SETCC: return LowerVSETCC(Op, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
@@ -4865,6 +4977,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::UDIV: return LowerUDIV(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
}
return SDValue();
}
@@ -4886,6 +5004,30 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
+ return;
+ case ISD::ATOMIC_CMP_SWAP:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
+ return;
}
if (Res.getNode())
Results.push_back(Res);
@@ -4963,7 +5105,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// cmp dest, oldval
// bne exitMBB
BB = loop1MBB;
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(dest).addReg(oldval));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -4976,8 +5121,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// cmp scratch, #0
// bne loop1MBB
BB = loop2MBB;
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
- .addReg(ptr));
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -5063,7 +5210,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
if (BinOpcode) {
// operand order needs to go the other way for NAND
if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
@@ -5074,8 +5224,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
addReg(dest).addReg(incr)).addReg(0);
}
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
- .addReg(ptr));
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -5125,12 +5277,12 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
case 1:
ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
+ extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case 2:
ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
+ extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case 4:
ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
@@ -5170,12 +5322,17 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
+ .addReg(dest)
+ .addImm(0));
}
// Build compare and cmov instructions.
@@ -5184,8 +5341,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
.addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
- .addReg(ptr));
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -5203,79 +5362,596 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
return BB;
}
-static
-MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I != Succ)
- return *I;
- llvm_unreachable("Expecting a BB with two successors!");
-}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Op1, unsigned Op2,
+ bool NeedsCarry, bool IsCmpxchg) const {
+ // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-// FIXME: This opcode table should obviously be expressed in the target
-// description. We probably just need a "machine opcode" value in the pseudo
-// instruction. But the ideal solution maybe to simply remove the "S" version
-// of the opcode altogether.
-struct AddSubFlagsOpcodePair {
- unsigned PseudoOpc;
- unsigned MachineOpc;
-};
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
-static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
- {ARM::ADCSri, ARM::ADCri},
- {ARM::ADCSrr, ARM::ADCrr},
- {ARM::ADCSrs, ARM::ADCrs},
- {ARM::SBCSri, ARM::SBCri},
- {ARM::SBCSrr, ARM::SBCrr},
- {ARM::SBCSrs, ARM::SBCrs},
- {ARM::RSBSri, ARM::RSBri},
- {ARM::RSBSrr, ARM::RSBrr},
- {ARM::RSBSrs, ARM::RSBrs},
- {ARM::RSCSri, ARM::RSCri},
- {ARM::RSCSrs, ARM::RSCrs},
- {ARM::t2ADCSri, ARM::t2ADCri},
- {ARM::t2ADCSrr, ARM::t2ADCrr},
- {ARM::t2ADCSrs, ARM::t2ADCrs},
- {ARM::t2SBCSri, ARM::t2SBCri},
- {ARM::t2SBCSrr, ARM::t2SBCrr},
- {ARM::t2SBCSrs, ARM::t2SBCrs},
- {ARM::t2RSBSri, ARM::t2RSBri},
- {ARM::t2RSBSrs, ARM::t2RSBrs},
-};
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ unsigned vallo = MI->getOperand(3).getReg();
+ unsigned valhi = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
-// Convert and Add or Subtract with Carry and Flags to a generic opcode with
-// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
-//
-// FIXME: Somewhere we should assert that CPSR<def> is in the correct
-// position to be recognized by the target descrition as the 'S' bit.
-bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- unsigned OldOpc = MI->getOpcode();
- unsigned NewOpc = 0;
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
- // This is only called for instructions that need remapping, so iterating over
- // the tiny opcode table is not costly.
- static const int NPairs =
- sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
- for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
- *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
- if (OldOpc == Pair->PseudoOpc) {
- NewOpc = Pair->MachineOpc;
- break;
+ unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
+ unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *contBB = 0, *cont2BB = 0;
+ if (IsCmpxchg) {
+ contBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
+ }
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ if (IsCmpxchg) {
+ MF->insert(It, contBB);
+ MF->insert(It, cont2BB);
+ }
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned storesuccess = MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrexd r2, r3, ptr
+ // <binopa> r0, r2, incr
+ // <binopb> r1, r3, incr
+ // strexd storesuccess, r0, r1, ptr
+ // cmp storesuccess, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ //
+ // Note that the registers are explicitly specified because there is not any
+ // way to force the register allocator to allocate a register pair.
+ //
+ // FIXME: The hardcoded registers are not necessary for Thumb2, but we
+ // need to properly enforce the restriction that the two output registers
+ // for ldrexd must be different.
+ BB = loopMBB;
+ // Load
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(ARM::R2, RegState::Define)
+ .addReg(ARM::R3, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
+
+ if (IsCmpxchg) {
+ // Add early exit
+ for (unsigned i = 0; i < 2; i++) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
+ ARM::CMPrr))
+ .addReg(i == 0 ? destlo : desthi)
+ .addReg(i == 0 ? vallo : valhi));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(i == 0 ? contBB : cont2BB);
+ BB = (i == 0 ? contBB : cont2BB);
}
+
+ // Copy to physregs for strexd
+ unsigned setlo = MI->getOperand(5).getReg();
+ unsigned sethi = MI->getOperand(6).getReg();
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
+ } else if (Op1) {
+ // Perform binary operation
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
+ .addReg(destlo).addReg(vallo))
+ .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
+ .addReg(desthi).addReg(valhi)).addReg(0);
+ } else {
+ // Copy to physregs for strexd
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
}
- if (!NewOpc)
- return false;
+ // Store
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
+ .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
+ // Cmp+jump
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(storesuccess).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+/// EmitBasePointerRecalculation - For functions using a base pointer, we
+/// rematerialize it (via the frame pointer).
+void ARMTargetLowering::
+EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ MachineFunction &MF = *MI->getParent()->getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+
+ if (!RI.hasBasePointer(MF)) return;
+
+ MachineBasicBlock::iterator MBBI = MI;
+
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "Base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function())
+ llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
+ else if (AFI->isThumbFunction())
+ llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *AII, RI);
+ else
+ llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
+
+ if (!RI.needsStackRealignment(MF)) return;
+
+ // If there's dynamic realignment, adjust for it.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert(!AFI->isThumb1OnlyFunction());
+
+ // Emit bic r6, r6, MaxAlign
+ unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(
+ AddDefaultPred(
+ BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign - 1)));
+}
+
+/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
+/// registers the function context.
+void ARMTargetLowering::
+SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
- for (unsigned i = 0; i < MI->getNumOperands(); ++i)
- MIB.addOperand(MI->getOperand(i));
- AddDefaultPred(MIB);
- MIB.addReg(ARM::CPSR, RegState::Define); // S bit
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ const Function *F = MF->getFunction();
+
+ bool isThumb = Subtarget->isThumb();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
+ unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
+
+ const TargetRegisterClass *TRC =
+ isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+
+ // Grab constant pool and fixed stack memory operands.
+ MachineMemOperand *CPMMO =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ MachineMemOperand *FIMMOSt =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore, 4, 4);
+
+ EmitBasePointerRecalculation(MI, MBB, DispatchBB);
+
+ // Load the address of the dispatch MBB into the jump buffer.
+ if (isThumb2) {
+ // Incoming value: jbuf
+ // ldr.n r5, LCPI1_1
+ // orr r5, r5, #1
+ // add r5, pc
+ // str r5, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(0x01)));
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
+ .addReg(NewVReg2, RegState::Kill)
+ .addImm(PCLabelId);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+ .addReg(NewVReg3, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ } else if (isThumb) {
+ // Incoming value: jbuf
+ // ldr.n r1, LCPI1_4
+ // add r1, pc
+ // mov r2, #1
+ // orrs r1, r2
+ // add r2, $jbuf, #+4 ; &jbuf[1]
+ // str r1, [r2]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId);
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addImm(1));
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3, RegState::Kill));
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36)); // &jbuf[1] :: pc
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg5, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(FIMMOSt));
+ } else {
+ // Incoming value: jbuf
+ // ldr r1, LCPI1_1
+ // add r1, pc, r1
+ // str r1, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addImm(0)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId));
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+ .addReg(NewVReg2, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ }
+}
+
+MachineBasicBlock *ARMTargetLowering::
+EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ int FI = MFI->getFunctionContextIndex();
+
+ const TargetRegisterClass *TRC =
+ Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+
+ // Get a mapping of the call site numbers to all of the landing pads they're
+ // associated with.
+ DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+ unsigned MaxCSNum = 0;
+ MachineModuleInfo &MMI = MF->getMMI();
+ for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
+ if (!BB->isLandingPad()) continue;
+
+ // FIXME: We should assert that the EH_LABEL is the first MI in the landing
+ // pad.
+ for (MachineBasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+ if (!II->isEHLabel()) continue;
+
+ MCSymbol *Sym = II->getOperand(0).getMCSymbol();
+ if (!MMI.hasCallSiteLandingPad(Sym)) continue;
+
+ SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
+ for (SmallVectorImpl<unsigned>::iterator
+ CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
+ CSI != CSE; ++CSI) {
+ CallSiteNumToLPad[*CSI].push_back(BB);
+ MaxCSNum = std::max(MaxCSNum, *CSI);
+ }
+ break;
+ }
+ }
+
+ // Get an ordered list of the machine basic blocks for the jump table.
+ std::vector<MachineBasicBlock*> LPadList;
+ SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
+ LPadList.reserve(CallSiteNumToLPad.size());
+ for (unsigned I = 1; I <= MaxCSNum; ++I) {
+ SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
+ LPadList.push_back(*II);
+ InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
+ }
+ }
+
+ assert(!LPadList.empty() &&
+ "No landing pad destinations for the dispatch jump table!");
+
+ // Create the jump table and associated information.
+ MachineJumpTableInfo *JTI =
+ MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
+ unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+ unsigned UId = AFI->createJumpTableUId();
+
+ // Create the MBBs for the dispatch code.
+
+ // Shove the dispatch's address into the return slot in the function context.
+ MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+ DispatchBB->setIsLandingPad();
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
+ DispatchBB->addSuccessor(TrapBB);
+
+ MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(DispContBB);
+
+ // Insert and renumber MBBs.
+ MachineBasicBlock *Last = &MF->back();
+ MF->insert(MF->end(), DispatchBB);
+ MF->insert(MF->end(), DispContBB);
+ MF->insert(MF->end(), TrapBB);
+ MF->RenumberBlocks(Last);
+
+ // Insert code into the entry block that creates and registers the function
+ // context.
+ SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
+
+ MachineMemOperand *FIMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOVolatile, 4, 4);
+
+ if (Subtarget->isThumb2()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg1)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else if (Subtarget->isThumb()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addMemOperand(FIMMOLd));
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg1)
+ .addImm(2));
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3));
+
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+ .addReg(NewVReg4, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
+ .addReg(NewVReg6, RegState::Kill)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+ MachineMemOperand::MOLoad, 4, 4);
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ }
+
+ // Add the jump table entries as successors to the MBB.
+ MachineBasicBlock *PrevMBB = 0;
+ for (std::vector<MachineBasicBlock*>::iterator
+ I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
+ MachineBasicBlock *CurMBB = *I;
+ if (PrevMBB != CurMBB)
+ DispContBB->addSuccessor(CurMBB);
+ PrevMBB = CurMBB;
+ }
+
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF);
+ for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
+ I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
+ MachineBasicBlock *BB = *I;
+
+ // Remove the landing pad successor from the invoke block and replace it
+ // with the new dispatch block.
+ for (MachineBasicBlock::succ_iterator
+ SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SMBB = *SI;
+ if (SMBB->isLandingPad()) {
+ BB->removeSuccessor(SMBB);
+ SMBB->setIsLandingPad(false);
+ }
+ }
+
+ BB->addSuccessor(DispatchBB);
+
+ // Find the invoke call and mark all of the callee-saved registers as
+ // 'implicit defined' so that they're spilled. This prevents code from
+ // moving instructions to before the EH block, where they will never be
+ // executed.
+ for (MachineBasicBlock::reverse_iterator
+ II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
+ if (!II->getDesc().isCall()) continue;
+
+ DenseMap<unsigned, bool> DefRegs;
+ for (MachineInstr::mop_iterator
+ OI = II->operands_begin(), OE = II->operands_end();
+ OI != OE; ++OI) {
+ if (!OI->isReg()) continue;
+ DefRegs[OI->getReg()] = true;
+ }
+
+ MachineInstrBuilder MIB(&*II);
+
+ for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
+ if (!TRC->contains(SavedRegs[i])) continue;
+ if (!DefRegs[SavedRegs[i]])
+ MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ break;
+ }
+ }
+
+ // The instruction is gone now.
MI->eraseFromParent();
- return true;
+
+ return MBB;
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
}
MachineBasicBlock *
@@ -5286,12 +5962,61 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
default: {
- if (RemapAddSubWithFlags(MI, BB))
- return BB;
-
MI->dump();
llvm_unreachable("Unexpected instr type to insert");
}
+ // The Thumb2 pre-indexed stores have the same MI operands, they just
+ // define them differently in the .td files from the isel patterns, so
+ // they need pseudos.
+ case ARM::t2STR_preidx:
+ MI->setDesc(TII->get(ARM::t2STR_PRE));
+ return BB;
+ case ARM::t2STRB_preidx:
+ MI->setDesc(TII->get(ARM::t2STRB_PRE));
+ return BB;
+ case ARM::t2STRH_preidx:
+ MI->setDesc(TII->get(ARM::t2STRH_PRE));
+ return BB;
+
+ case ARM::STRi_preidx:
+ case ARM::STRBi_preidx: {
+ unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
+ ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
+ // Decode the offset.
+ unsigned Offset = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
+ Offset = ARM_AM::getAM2Offset(Offset);
+ if (isSub)
+ Offset = -Offset;
+
+ MachineMemOperand *MMO = *MI->memoperands_begin();
+ BuildMI(*BB, MI, dl, TII->get(NewOpc))
+ .addOperand(MI->getOperand(0)) // Rn_wb
+ .addOperand(MI->getOperand(1)) // Rt
+ .addOperand(MI->getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .addOperand(MI->getOperand(5)) // pred
+ .addOperand(MI->getOperand(6))
+ .addMemOperand(MMO);
+ MI->eraseFromParent();
+ return BB;
+ }
+ case ARM::STRr_preidx:
+ case ARM::STRBr_preidx:
+ case ARM::STRH_preidx: {
+ unsigned NewOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
+ case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
+ case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
+ }
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+ MI->eraseFromParent();
+ return BB;
+ }
case ARM::ATOMIC_LOAD_ADD_I8:
return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
case ARM::ATOMIC_LOAD_ADD_I16:
@@ -5370,6 +6095,31 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
+ case ARM::ATOMADD6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
+ isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
+ /*NeedsCarry*/ true);
+ case ARM::ATOMSUB6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true);
+ case ARM::ATOMOR6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
+ isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMXOR6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
+ isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMAND6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
+ isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMSWAP6432:
+ return EmitAtomicBinary64(MI, BB, 0, 0, false);
+ case ARM::ATOMCMPXCHG6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ false, /*IsCmpxchg*/true);
+
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -5461,13 +6211,159 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
- .addMBB(exitMBB);
+ if (isThumb2)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
+ else
+ BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
+
+ case ARM::ABS:
+ case ARM::t2ABS: {
+ // To insert an ABS instruction, we have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // source vreg to test against 0, the destination vreg to set,
+ // the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ // It transforms
+ // V1 = ABS V0
+ // into
+ // V2 = MOVS V0
+ // BCC (branch to SinkBB if V0 >= 0)
+ // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
+ // SinkBB: V1 = PHI(V2, V3)
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator BBI = BB;
+ ++BBI;
+ MachineFunction *Fn = BB->getParent();
+ MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ Fn->insert(BBI, RSBBB);
+ Fn->insert(BBI, SinkBB);
+
+ unsigned int ABSSrcReg = MI->getOperand(1).getReg();
+ unsigned int ABSDstReg = MI->getOperand(0).getReg();
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineRegisterInfo &MRI = Fn->getRegInfo();
+ // In Thumb mode S must not be specified if source register is the SP or
+ // PC and if destination register is the SP, so restrict register class
+ unsigned NewMovDstReg = MRI.createVirtualRegister(
+ isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+ unsigned NewRsbDstReg = MRI.createVirtualRegister(
+ isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ SinkBB->splice(SinkBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ SinkBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(RSBBB);
+ BB->addSuccessor(SinkBB);
+
+ // fall through to SinkMBB
+ RSBBB->addSuccessor(SinkBB);
+
+ // insert a movs at the end of BB
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
+ NewMovDstReg)
+ .addReg(ABSSrcReg, RegState::Kill)
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(ARM::CPSR, RegState::Define);
+
+ // insert a bcc with opposite CC to ARMCC::MI at the end of BB
+ BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
+ .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
+
+ // insert rsbri in RSBBB
+ // Note: BCC and rsbri will be converted into predicated rsbmi
+ // by if-conversion pass
+ BuildMI(*RSBBB, RSBBB->begin(), dl,
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(NewMovDstReg, RegState::Kill)
+ .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+
+ // insert PHI in SinkBB,
+ // reuse ABSDstReg to not change uses of ABS instruction
+ BuildMI(*SinkBB, SinkBB->begin(), dl,
+ TII->get(ARM::PHI), ABSDstReg)
+ .addReg(NewRsbDstReg).addMBB(RSBBB)
+ .addReg(NewMovDstReg).addMBB(BB);
+
+ // remove ABS instruction
+ MI->eraseFromParent();
+
+ // return last added BB
+ return SinkBB;
+ }
+ }
+}
+
+void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.hasPostISelHook()) {
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
+ return;
+ }
+
+ // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
+ // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
+ // operand is still set to noreg. If needed, set the optional operand's
+ // register to CPSR, and remove the redundant implicit def.
+ //
+ // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+
+ // Rename pseudo opcodes.
+ unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
+ if (NewOpc) {
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+ MI->setDesc(TII->get(NewOpc));
+ }
+ unsigned ccOutIdx = MCID.getNumOperands() - 1;
+
+ // Any ARM instruction that sets the 's' bit should specify an optional
+ // "cc_out" operand in the last operand position.
+ if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
+ // since we already have an optional CPSR def.
+ bool definesCPSR = false;
+ bool deadCPSR = false;
+ for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
+ definesCPSR = true;
+ if (MO.isDead())
+ deadCPSR = true;
+ MI->RemoveOperand(i);
+ break;
+ }
+ }
+ if (!definesCPSR) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
+ if (deadCPSR) {
+ assert(!MI->getOperand(ccOutIdx).getReg() &&
+ "expect uninitialized optional cc_out operand");
+ return;
}
+
+ // If this instruction was defined with an optional CPSR def and its dag node
+ // had a live implicit CPSR def, then activate the optional CPSR def.
+ MachineOperand &MO = MI->getOperand(ccOutIdx);
+ MO.setReg(ARM::CPSR);
+ MO.setIsDef(true);
}
//===----------------------------------------------------------------------===//
@@ -6975,7 +7871,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
SDValue FalseVal = N->getOperand(0);
SDValue TrueVal = N->getOperand(1);
SDValue ARMcc = N->getOperand(2);
- ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+ ARMCC::CondCodes CC =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
// Simplify
// mov r1, r0
@@ -6995,7 +7892,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// movne r0, y
/// FIXME: Turn this into a target neutral optimization?
SDValue Res;
- if (CC == ARMCC::NE && FalseVal == RHS) {
+ if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
N->getOperand(3), Cmp);
} else if (CC == ARMCC::EQ && TrueVal == RHS) {
@@ -7235,7 +8132,7 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
EVT VT = getValueType(Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;
@@ -7351,7 +8248,8 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
if (Ptr->getOpcode() == ISD::ADD) {
isInc = true;
- ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+ ARM_AM::ShiftOpc ShOpcVal=
+ ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
Base = Ptr->getOperand(1);
Offset = Ptr->getOperand(0);
@@ -7536,7 +8434,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
if (AsmPieces.size() == 3 &&
AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (Ty && Ty->getBitWidth() == 32)
return IntrinsicLowering::LowerToByteSwap(CI);
}
@@ -7559,6 +8457,9 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
case 'x': return C_RegisterClass;
case 't': return C_RegisterClass;
case 'j': return C_Other; // Constant for movw.
+ // An address with a single base register. Due to the way we
+ // currently handle addresses it is the same as an 'r' memory constraint.
+ case 'Q': return C_Memory;
}
} else if (Constraint.size() == 2) {
switch (Constraint[0]) {
@@ -7582,7 +8483,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
@@ -7618,7 +8519,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return RCPair(0U, ARM::GPRRegisterClass);
case 'h': // High regs or no regs.
if (Subtarget->isThumb())
- return RCPair(0U, ARM::hGPRRegisterClass);
+ return RCPair(0U, ARM::hGPRRegisterClass);
break;
case 'r':
return RCPair(0U, ARM::GPRRegisterClass);
@@ -7632,15 +8533,15 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
break;
case 'x':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPR_8RegisterClass);
+ return RCPair(0U, ARM::SPR_8RegisterClass);
if (VT.getSizeInBits() == 64)
- return RCPair(0U, ARM::DPR_8RegisterClass);
+ return RCPair(0U, ARM::DPR_8RegisterClass);
if (VT.getSizeInBits() == 128)
- return RCPair(0U, ARM::QPR_8RegisterClass);
+ return RCPair(0U, ARM::QPR_8RegisterClass);
break;
case 't':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, ARM::SPRRegisterClass);
break;
}
}
@@ -7680,12 +8581,12 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
switch (ConstraintLetter) {
case 'j':
- // Constant suitable for movw, must be between 0 and
- // 65535.
- if (Subtarget->hasV6T2Ops())
- if (CVal >= 0 && CVal <= 65535)
- break;
- return;
+ // Constant suitable for movw, must be between 0 and
+ // 65535.
+ if (Subtarget->hasV6T2Ops())
+ if (CVal >= 0 && CVal <= 65535)
+ break;
+ return;
case 'I':
if (Subtarget->isThumb1Only()) {
// This must be a constant between 0 and 255, for ADD
@@ -7823,50 +8724,6 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-int ARM::getVFPf32Imm(const APFloat &FPImm) {
- APInt Imm = FPImm.bitcastToAPInt();
- uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
- int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
- int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
-
- // We can handle 4 bits of mantissa.
- // mantissa = (16+UInt(e:f:g:h))/16.
- if (Mantissa & 0x7ffff)
- return -1;
- Mantissa >>= 19;
- if ((Mantissa & 0xf) != Mantissa)
- return -1;
-
- // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
- if (Exp < -3 || Exp > 4)
- return -1;
- Exp = ((Exp+3) & 0x7) ^ 4;
-
- return ((int)Sign << 7) | (Exp << 4) | Mantissa;
-}
-
-int ARM::getVFPf64Imm(const APFloat &FPImm) {
- APInt Imm = FPImm.bitcastToAPInt();
- uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
- int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
- uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
-
- // We can handle 4 bits of mantissa.
- // mantissa = (16+UInt(e:f:g:h))/16.
- if (Mantissa & 0xffffffffffffLL)
- return -1;
- Mantissa >>= 48;
- if ((Mantissa & 0xf) != Mantissa)
- return -1;
-
- // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
- if (Exp < -3 || Exp > 4)
- return -1;
- Exp = ((Exp+3) & 0x7) ^ 4;
-
- return ((int)Sign << 7) | (Exp << 4) | Mantissa;
-}
-
bool ARM::isBitFieldInvertedMask(unsigned v) {
if (v == 0xffffffff)
return 0;
@@ -7889,9 +8746,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
if (!Subtarget->hasVFP3())
return false;
if (VT == MVT::f32)
- return ARM::getVFPf32Imm(Imm) != -1;
+ return ARM_AM::getFP32Imm(Imm) != -1;
if (VT == MVT::f64)
- return ARM::getVFPf64Imm(Imm) != -1;
+ return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}
@@ -7933,7 +8790,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
- const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 980fb40..5da9b27 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -71,6 +71,11 @@ namespace llvm {
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+ ADDC, // Add with carry
+ ADDE, // Add using carry
+ SUBC, // Sub with carry
+ SUBE, // Sub using carry
+
VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.
@@ -206,18 +211,22 @@ namespace llvm {
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD
+ VST4LN_UPD,
+
+ // 64-bit atomic ops (value split into two registers)
+ ATOMADD64_DAG,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG,
+ ATOMCMPXCHG64_DAG
};
}
/// Define some predicates that are used for node matching.
namespace ARM {
- /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
- /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
- /// instruction, returns its 8-bit integer representation. Otherwise,
- /// returns -1.
- int getVFPf32Imm(const APFloat &FPImm);
- int getVFPf64Imm(const APFloat &FPImm);
bool isBitFieldInvertedMask(unsigned v);
}
@@ -240,10 +249,16 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
+
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+ virtual void
+ AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const;
+
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -256,7 +271,7 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -485,12 +500,28 @@ namespace llvm {
MachineBasicBlock *BB,
unsigned Size,
unsigned BinOpcode) const;
+ MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Op1,
+ unsigned Op2,
+ bool NeedsCarry = false,
+ bool IsCmpxchg = false) const;
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const;
+ void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB) const;
+
+ void SetupEntryBlockForSjLj(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const;
+
+ MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
};
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 3ccf22f..7cbc911 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -25,7 +25,7 @@ def BrFrm : Format<2>;
def BrMiscFrm : Format<3>;
def DPFrm : Format<4>;
-def DPSoRegFrm : Format<5>;
+def DPSoRegRegFrm : Format<5>;
def LdFrm : Format<6>;
def StFrm : Format<7>;
@@ -68,6 +68,7 @@ def N3RegVShFrm : Format<38>;
def NVExtFrm : Format<39>;
def NVMulSLFrm : Format<40>;
def NVTBLFrm : Format<41>;
+def DPSoRegImmFrm : Format<42>;
// Misc flags.
@@ -130,39 +131,15 @@ def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
// ARM special operands.
//
-def CondCodeOperand : AsmOperandClass {
- let Name = "CondCode";
- let SuperClasses = [];
-}
-
-def CCOutOperand : AsmOperandClass {
- let Name = "CCOut";
- let SuperClasses = [];
-}
-
-def MemBarrierOptOperand : AsmOperandClass {
- let Name = "MemBarrierOpt";
- let SuperClasses = [];
- let ParserMethod = "tryParseMemBarrierOptOperand";
-}
-
-def ProcIFlagsOperand : AsmOperandClass {
- let Name = "ProcIFlags";
- let SuperClasses = [];
- let ParserMethod = "tryParseProcIFlagsOperand";
-}
-
-def MSRMaskOperand : AsmOperandClass {
- let Name = "MSRMask";
- let SuperClasses = [];
- let ParserMethod = "tryParseMSRMaskOperand";
-}
-
// ARM imod and iflag operands, used only by the CPS instruction.
def imod_op : Operand<i32> {
let PrintMethod = "printCPSIMod";
}
+def ProcIFlagsOperand : AsmOperandClass {
+ let Name = "ProcIFlags";
+ let ParserMethod = "parseProcIFlagsOperand";
+}
def iflags_op : Operand<i32> {
let PrintMethod = "printCPSIFlag";
let ParserMatchClass = ProcIFlagsOperand;
@@ -170,17 +147,21 @@ def iflags_op : Operand<i32> {
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
-def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; }
+def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
(ops (i32 14), (i32 zero_reg))> {
let PrintMethod = "printPredicateOperand";
let ParserMatchClass = CondCodeOperand;
+ let DecoderMethod = "DecodePredicateOperand";
}
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def CCOutOperand : AsmOperandClass { let Name = "CCOut"; }
def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
let EncoderMethod = "getCCOutOpValue";
let PrintMethod = "printSBitModifierOperand";
let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
}
// Same as cc_out except it defaults to setting CPSR.
@@ -188,16 +169,27 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
let EncoderMethod = "getCCOutOpValue";
let PrintMethod = "printSBitModifierOperand";
let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
}
// ARM special operands for disassembly only.
//
+def SetEndAsmOperand : AsmOperandClass {
+ let Name = "SetEndImm";
+ let ParserMethod = "parseSetEndImm";
+}
def setend_op : Operand<i32> {
let PrintMethod = "printSetendOperand";
+ let ParserMatchClass = SetEndAsmOperand;
}
+def MSRMaskOperand : AsmOperandClass {
+ let Name = "MSRMask";
+ let ParserMethod = "parseMSRMaskOperand";
+}
def msr_mask : Operand<i32> {
let PrintMethod = "printMSRMaskOperand";
+ let DecoderMethod = "DecodeMSRMask";
let ParserMatchClass = MSRMaskOperand;
}
@@ -211,21 +203,40 @@ def msr_mask : Operand<i32> {
// 64 64 - <imm> is encoded in imm6<5:0>
def shr_imm8 : Operand<i32> {
let EncoderMethod = "getShiftRight8Imm";
+ let DecoderMethod = "DecodeShiftRight8Imm";
}
def shr_imm16 : Operand<i32> {
let EncoderMethod = "getShiftRight16Imm";
+ let DecoderMethod = "DecodeShiftRight16Imm";
}
def shr_imm32 : Operand<i32> {
let EncoderMethod = "getShiftRight32Imm";
+ let DecoderMethod = "DecodeShiftRight32Imm";
}
def shr_imm64 : Operand<i32> {
let EncoderMethod = "getShiftRight64Imm";
+ let DecoderMethod = "DecodeShiftRight64Imm";
}
//===----------------------------------------------------------------------===//
+// ARM Assembler alias templates.
+//
+class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>;
+class tInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>;
+class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
+class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
+class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+
+//===----------------------------------------------------------------------===//
// ARM Instruction templates.
//
+
class InstTemplate<AddrMode am, int sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
: Instruction {
@@ -240,17 +251,22 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
Domain D = d;
bit isUnaryDataProc = 0;
bit canXformTo16Bit = 0;
+ // The instruction is a 16-bit flag setting Thumb instruction. Used
+ // by the parser to determine whether to require the 'S' suffix on the
+ // mnemonic (when not in an IT block) or preclude it (when in an IT block).
+ bit thumbArithFlagSetting = 0;
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
- // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
+ // The layout of TSFlags should be kept in sync with ARMBaseInfo.h.
let TSFlags{4-0} = AM.Value;
let TSFlags{6-5} = IndexModeBits;
let TSFlags{12-7} = Form;
let TSFlags{13} = isUnaryDataProc;
let TSFlags{14} = canXformTo16Bit;
let TSFlags{17-15} = D.Value;
+ let TSFlags{18} = thumbArithFlagSetting;
let Constraints = cstr;
let Itinerary = itin;
@@ -262,13 +278,17 @@ class Encoding {
class InstARM<AddrMode am, int sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
- : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding;
+ : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding {
+ let DecoderNamespace = "ARM";
+}
// This Encoding-less class is used by Thumb1 to specify the encoding bits later
// on by adding flavors to specific instructions.
class InstThumb<AddrMode am, int sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
- : InstTemplate<am, sz, im, f, d, cstr, itin>;
+ : InstTemplate<am, sz, im, f, d, cstr, itin> {
+ let DecoderNamespace = "Thumb";
+}
class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
: InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
@@ -426,11 +446,11 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
: I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
bits<4> Rt;
- bits<4> Rn;
+ bits<4> addr;
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
let Inst{20} = 1;
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rt;
let Inst{11-0} = 0b111110011111;
}
@@ -450,14 +470,14 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{3-0} = Rt;
}
class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
- : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> {
+ : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> {
bits<4> Rt;
bits<4> Rt2;
- bits<4> Rn;
+ bits<4> addr;
let Inst{27-23} = 0b00010;
let Inst{22} = b;
let Inst{21-20} = 0b00;
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rt;
let Inst{11-4} = 0b00001001;
let Inst{3-0} = Rt2;
@@ -515,22 +535,41 @@ class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
let Inst{20} = isLd; // L bit
let Inst{15-12} = Rt;
}
-class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
+class AI2stridx_reg<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+}
+
+class AI2stridx_imm<bit isByte, bit isPre, dag oops, dag iops,
IndexMode im, Format f, InstrItinClass itin, string opc,
string asm, string cstr, list<dag> pattern>
: AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
pattern> {
// AM2 store w/ two operands: (GPR, am2offset)
- // {13} 1 == Rm, 0 == imm12
// {12} isAdd
// {11-0} imm12/Rm
bits<14> offset;
bits<4> Rn;
- let Inst{25} = offset{13};
+ let Inst{25} = 0;
let Inst{23} = offset{12};
let Inst{19-16} = Rn;
let Inst{11-0} = offset{11-0};
}
+
+
// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
// but for now use this class for STRT and STRBT.
class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
@@ -568,9 +607,11 @@ class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{7-4} = op;
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
-class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+class AI3ldstidx<bits<4> op, bit op20, bit isPre, dag oops, dag iops,
IndexMode im, Format f, InstrItinClass itin, string opc,
string asm, string cstr, list<dag> pattern>
: I<oops, iops, AddrMode3, 4, im, f, itin,
@@ -586,48 +627,24 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
// but for now use this class for LDRSBT, LDRHT, LDSHT.
-class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+class AI3ldstidxT<bits<4> op, bit isLoad, dag oops, dag iops,
IndexMode im, Format f, InstrItinClass itin, string opc,
string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, im, f, itin,
- opc, asm, cstr, pattern> {
+ : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> {
// {13} 1 == imm8, 0 == Rm
// {12-9} Rn
// {8} isAdd
// {7-4} imm7_4/zero
// {3-0} imm3_0/Rm
- bits<14> addr;
- bits<4> Rt;
- let Inst{27-25} = 0b000;
- let Inst{24} = isPre; // P bit
- let Inst{23} = addr{8}; // U bit
- let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
- let Inst{20} = op20; // L bit
- let Inst{19-16} = addr{12-9}; // Rn
- let Inst{15-12} = Rt; // Rt
- let Inst{11-8} = addr{7-4}; // imm7_4/zero
- let Inst{7-4} = op;
- let Inst{3-0} = addr{3-0}; // imm3_0/Rm
- let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3";
-}
-
-class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
- IndexMode im, Format f, InstrItinClass itin, string opc,
- string asm, string cstr, list<dag> pattern>
- : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
- pattern> {
- // AM3 store w/ two operands: (GPR, am3offset)
- bits<14> offset;
+ bits<4> addr;
bits<4> Rt;
- bits<4> Rn;
let Inst{27-25} = 0b000;
- let Inst{23} = offset{8};
- let Inst{22} = offset{9};
- let Inst{19-16} = Rn;
+ let Inst{24} = 0; // P bit
+ let Inst{21} = 1;
+ let Inst{20} = isLoad; // L bit
+ let Inst{19-16} = addr; // Rn
let Inst{15-12} = Rt; // Rt
- let Inst{11-8} = offset{7-4}; // imm7_4/zero
let Inst{7-4} = op;
- let Inst{3-0} = offset{3-0}; // imm3_0/Rm
}
// stores
@@ -648,75 +665,7 @@ class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{7-4} = op;
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
-}
-
-// Pre-indexed stores
-class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-
-// Post-indexed stores
-class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- // {13} 1 == imm8, 0 == Rm
- // {12-9} Rn
- // {8} isAdd
- // {7-4} imm7_4/zero
- // {3-0} imm3_0/Rm
- bits<14> addr;
- bits<4> Rt;
- let Inst{3-0} = addr{3-0}; // imm3_0/Rm
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{11-8} = addr{7-4}; // imm7_4/zero
- let Inst{15-12} = Rt; // Rt
- let Inst{19-16} = addr{12-9}; // Rn
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
- let Inst{23} = addr{8}; // U bit
- let Inst{24} = 0; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 0; // P bit
- let Inst{27-25} = 0b000;
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
// addrmode4 instructions
@@ -843,6 +792,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
}
// PKH instructions
+def PKHLSLAsmOperand : AsmOperandClass {
+ let Name = "PKHLSLImm";
+ let ParserMethod = "parsePKHLSLImm";
+}
+def pkh_lsl_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]>{
+ let PrintMethod = "printPKHLSLShiftImm";
+ let ParserMatchClass = PKHLSLAsmOperand;
+}
+def PKHASRAsmOperand : AsmOperandClass {
+ let Name = "PKHASRImm";
+ let ParserMethod = "parsePKHASRImm";
+}
+def pkh_asr_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]>{
+ let PrintMethod = "printPKHASRShiftImm";
+ let ParserMatchClass = PKHASRAsmOperand;
+}
+
class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
@@ -850,11 +816,11 @@ class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
- bits<8> sh;
+ bits<5> sh;
let Inst{27-20} = opcod;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-7} = sh{7-3};
+ let Inst{11-7} = sh;
let Inst{6} = tb;
let Inst{5-4} = 0b01;
let Inst{3-0} = Rm;
@@ -949,7 +915,9 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
+ let thumbArithFlagSetting = 1;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "ThumbSBit";
}
class T1sI<dag oops, dag iops, InstrItinClass itin,
@@ -1071,6 +1039,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
}
// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
@@ -1091,6 +1060,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
}
// Special cases
@@ -1103,6 +1073,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
}
class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
@@ -1114,6 +1085,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "Thumb";
}
class T2I<dag oops, dag iops, InstrItinClass itin,
@@ -1132,8 +1104,8 @@ class T2Ipc<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "",
+ string opc, string asm, string cstr, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
pattern> {
bits<4> Rt;
bits<4> Rt2;
@@ -1149,6 +1121,26 @@ class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
let Inst{11-8} = Rt2{3-0};
let Inst{7-0} = addr{7-0};
}
+class T2Ii8s4post<bit P, bit W, bit isLoad, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> addr;
+ bits<9> imm;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = imm{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = imm{7-0};
+}
class T2sI<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -1172,8 +1164,8 @@ class T2XIt<dag oops, dag iops, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
-// T2Iidxldst - Thumb2 indexed load / store instructions.
-class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
+// T2Ipreldst - Thumb2 pre-indexed load / store instructions.
+class T2Ipreldst<bit signed, bits<2> opcod, bit load, bit pre,
dag oops, dag iops,
AddrMode am, IndexMode im, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -1183,25 +1175,60 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+
+ bits<4> Rt;
+ bits<13> addr;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
let Inst{23} = 0;
let Inst{22-21} = opcod;
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
let Inst{11} = 1;
// (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
let Inst{10} = pre; // The P bit.
+ let Inst{9} = addr{8}; // Sign bit
let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = addr{7-0};
- bits<9> addr;
- let Inst{7-0} = addr{7-0};
- let Inst{9} = addr{8}; // Sign bit
+ let DecoderMethod = "DecodeT2LdStPre";
+}
+
+// T2Ipostldst - Thumb2 post-indexed load / store instructions.
+class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
bits<4> Rt;
bits<4> Rn;
+ bits<9> offset;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{19-16} = Rn;
let Inst{15-12} = Rt{3-0};
- let Inst{19-16} = Rn{3-0};
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{9} = offset{8}; // Sign bit
+ let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = offset{7-0};
+
+ let DecoderMethod = "DecodeT2LdStPre";
}
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
@@ -1242,6 +1269,7 @@ class VFPI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
list<Predicate> Predicates = [HasVFP2];
}
@@ -1257,6 +1285,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = asm;
let Pattern = pattern;
let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
list<Predicate> Predicates = [HasVFP2];
}
@@ -1574,6 +1603,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
}
// Same as NeonI except it does not have a "data type" specifier.
@@ -1586,6 +1616,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
let AsmString = !strconcat(opc, "${p}", "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
}
class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
@@ -1600,6 +1631,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
let Inst{7-4} = op7_4;
let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+ let DecoderNamespace = "NEONLoadStore";
bits<5> Vd;
bits<6> Rn;
@@ -1643,6 +1675,7 @@ class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
pattern> {
let Inst{31-25} = 0b1111001;
let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
}
class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1651,6 +1684,7 @@ class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
cstr, pattern> {
let Inst{31-25} = 0b1111001;
let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
}
// NEON "one register and a modified immediate" format.
@@ -1677,6 +1711,7 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
let Inst{24} = SIMM{7};
let Inst{18-16} = SIMM{6-4};
let Inst{3-0} = SIMM{3-0};
+ let DecoderMethod = "DecodeNEONModImmInstruction";
}
// NEON 2 vector register format.
@@ -1874,6 +1909,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
list<Predicate> Predicates = [HasNEON];
let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+ let DecoderNamespace = "NEONDup";
bits<5> V;
bits<4> R;
@@ -1915,7 +1951,6 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
bits<5> Vd;
bits<5> Vm;
- bits<4> lane;
let Inst{22} = Vd{4};
let Inst{15-12} = Vd{3-0};
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index adcbf18..48da03f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -13,8 +13,8 @@
#include "ARMInstrInfo.h"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -30,14 +30,18 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
switch (Opc) {
default: break;
- case ARM::LDR_PRE:
- case ARM::LDR_POST:
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDR_PRE_REG:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
return ARM::LDRi12;
case ARM::LDRH_PRE:
case ARM::LDRH_POST:
return ARM::LDRH;
- case ARM::LDRB_PRE:
- case ARM::LDRB_POST:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDRB_PRE_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
return ARM::LDRBi12;
case ARM::LDRSH_PRE:
case ARM::LDRSH_POST:
@@ -45,14 +49,18 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
case ARM::LDRSB_PRE:
case ARM::LDRSB_POST:
return ARM::LDRSB;
- case ARM::STR_PRE:
- case ARM::STR_POST:
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
return ARM::STRi12;
case ARM::STRH_PRE:
case ARM::STRH_POST:
return ARM::STRH;
- case ARM::STRB_PRE:
- case ARM::STRB_POST:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRB_PRE_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
return ARM::STRBi12;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index a42dd1a..2cf0f09 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -70,6 +70,18 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
@@ -120,6 +132,12 @@ def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>;
+def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>;
+def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
+
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
@@ -187,10 +205,16 @@ def IsThumb : Predicate<"Subtarget->isThumb()">,
def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+def IsMClass : Predicate<"Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass">;
+def IsARClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"!FeatureMClass">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb">;
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"ModeNaCl">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
@@ -263,24 +287,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_65535AsmOperand;
}
+class BinOpWithFlagFrag<dag res> :
+ PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
-/// adde and sube predicates - True based on whether the carry flag output
-/// will be needed or not.
-def adde_dead_carry :
- PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
- [{return !N->hasAnyUseOfValue(1);}]>;
-def sube_dead_carry :
- PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
- [{return !N->hasAnyUseOfValue(1);}]>;
-def adde_live_carry :
- PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
- [{return N->hasAnyUseOfValue(1);}]>;
-def sube_live_carry :
- PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
- [{return N->hasAnyUseOfValue(1);}]>;
-
// An 'and' node with a single use.
def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
return N->hasOneUse();
@@ -315,6 +326,7 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
def brtarget : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeT2BROperand";
}
// FIXME: get rid of this one?
@@ -345,39 +357,35 @@ def bl_target : Operand<i32> {
let OperandType = "OPERAND_PCREL";
}
-
-// A list of registers separated by comma. Used by load/store multiple.
-def RegListAsmOperand : AsmOperandClass {
- let Name = "RegList";
- let SuperClasses = [];
-}
-
-def DPRRegListAsmOperand : AsmOperandClass {
- let Name = "DPRRegList";
- let SuperClasses = [];
-}
-
-def SPRRegListAsmOperand : AsmOperandClass {
- let Name = "SPRRegList";
- let SuperClasses = [];
+def blx_target : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getARMBLXTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
}
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; }
def reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
let ParserMatchClass = RegListAsmOperand;
let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeRegListOperand";
}
+def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
def dpr_reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
let ParserMatchClass = DPRRegListAsmOperand;
let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeDPRRegListOperand";
}
+def SPRRegListAsmOperand : AsmOperandClass { let Name = "SPRRegList"; }
def spr_reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
let ParserMatchClass = SPRRegListAsmOperand;
let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeSPRRegListOperand";
}
// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
@@ -397,56 +405,99 @@ def adrlabel : Operand<i32> {
def neon_vcvt_imm32 : Operand<i32> {
let EncoderMethod = "getNEONVcvtImm32OpValue";
+ let DecoderMethod = "DecodeVCVTImmOperand";
}
// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
-def rot_imm : Operand<i32>, ImmLeaf<i32, [{
- int32_t v = (int32_t)Imm;
- return v == 8 || v == 16 || v == 24; }]> {
- let EncoderMethod = "getRotImmOpValue";
+def rot_imm_XFORM: SDNodeXForm<imm, [{
+ switch (N->getZExtValue()){
+ default: assert(0);
+ case 0: return CurDAG->getTargetConstant(0, MVT::i32);
+ case 8: return CurDAG->getTargetConstant(1, MVT::i32);
+ case 16: return CurDAG->getTargetConstant(2, MVT::i32);
+ case 24: return CurDAG->getTargetConstant(3, MVT::i32);
+ }
+}]>;
+def RotImmAsmOperand : AsmOperandClass {
+ let Name = "RotImm";
+ let ParserMethod = "parseRotImm";
}
-
-def ShifterAsmOperand : AsmOperandClass {
- let Name = "Shifter";
- let SuperClasses = [];
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+ int32_t v = N->getZExtValue();
+ return v == 8 || v == 16 || v == 24; }],
+ rot_imm_XFORM> {
+ let PrintMethod = "printRotImmOperand";
+ let ParserMatchClass = RotImmAsmOperand;
}
// shift_imm: An integer that encodes a shift amount and the type of shift
-// (currently either asr or lsl) using the same encoding used for the
-// immediates in so_reg operands.
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 encoded as imm5 == 0.
+def ShifterImmAsmOperand : AsmOperandClass {
+ let Name = "ShifterImm";
+ let ParserMethod = "parseShifterImm";
+}
def shift_imm : Operand<i32> {
let PrintMethod = "printShiftImmOperand";
- let ParserMatchClass = ShifterAsmOperand;
+ let ParserMatchClass = ShifterImmAsmOperand;
}
-def ShiftedRegAsmOperand : AsmOperandClass {
- let Name = "ShiftedReg";
+// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm.
+def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; }
+def so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectRegShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm);
}
-// shifter_operand operands: so_reg and so_imm.
-def so_reg : Operand<i32>, // reg reg imm
- ComplexPattern<i32, 3, "SelectShifterOperandReg",
- [shl,srl,sra,rotr]> {
- let EncoderMethod = "getSORegOpValue";
- let PrintMethod = "printSORegOperand";
- let ParserMatchClass = ShiftedRegAsmOperand;
- let MIOperandInfo = (ops GPR, GPR, shift_imm);
+def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; }
+def so_reg_imm : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectImmShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShiftRegShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
}
+
// FIXME: Does this need to be distinct from so_reg?
-def shift_so_reg : Operand<i32>, // reg reg imm
- ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+def shift_so_reg_imm : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
[shl,srl,sra,rotr]> {
- let EncoderMethod = "getSORegOpValue";
- let PrintMethod = "printSORegOperand";
- let MIOperandInfo = (ops GPR, GPR, shift_imm);
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
}
+
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits.
+def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; }
def so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getSOImmVal(Imm) != -1;
}]> {
let EncoderMethod = "getSOImmOpValue";
+ let ParserMatchClass = SOImmAsmOperand;
+ let DecoderMethod = "DecodeSOImmOperand";
}
// Break so_imm's up into two pieces. This handles immediates with up to 16
@@ -464,7 +515,7 @@ def arm_i32imm : PatLeaf<(imm), [{
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
-/// imm0_7 predicate - Immediate in the range [0,31].
+/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 8;
@@ -472,7 +523,7 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_7AsmOperand;
}
-/// imm0_15 predicate - Immediate in the range [0,31].
+/// imm0_15 predicate - Immediate in the range [0,15].
def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 16;
@@ -481,68 +532,83 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; }
def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 32;
-}]>;
-
-/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
-def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{
- return Imm >= 0 && Imm < 32;
}]> {
- let EncoderMethod = "getImmMinusOneOpValue";
+ let ParserMatchClass = Imm0_31AsmOperand;
+}
+
+/// imm0_255 predicate - Immediate in the range [0,255].
+def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+ let ParserMatchClass = Imm0_255AsmOperand;
}
-// i32imm_hilo16 - For movt/movw - sets the MC Encoder method.
-// The imm is split into imm{15-12}, imm{11-0}
+// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
+// a relocatable expression.
//
-def i32imm_hilo16 : Operand<i32> {
+// FIXME: This really needs a Thumb version separate from the ARM version.
+// While the range is the same, and can thus use the same match class,
+// the encoding is different so it should have a different encoder method.
+def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; }
+def imm0_65535_expr : Operand<i32> {
let EncoderMethod = "getHiLo16ImmOpValue";
+ let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
+/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
+def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; }
+def imm24b : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm <= 0xffffff;
+}]> {
+ let ParserMatchClass = Imm24bitAsmOperand;
+}
+
+
/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
/// e.g., 0xf000ffff
+def BitfieldAsmOperand : AsmOperandClass {
+ let Name = "Bitfield";
+ let ParserMethod = "parseBitfield";
+}
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
return ARM::isBitFieldInvertedMask(N->getZExtValue());
}] > {
let EncoderMethod = "getBitfieldInvertedMaskOpValue";
let PrintMethod = "printBitfieldInvMaskImmOperand";
+ let DecoderMethod = "DecodeBitfieldMaskOperand";
+ let ParserMatchClass = BitfieldAsmOperand;
}
-/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
-def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{
- return isInt<5>(Imm);
+def imm1_32_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
}]>;
-
-/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
-def width_imm : Operand<i32>, ImmLeaf<i32, [{
- return Imm > 0 && Imm <= 32;
-}] > {
- let EncoderMethod = "getMsbOpValue";
-}
-
-def ssat_imm : Operand<i32>, ImmLeaf<i32, [{
- return Imm > 0 && Imm <= 32;
-}]> {
- let EncoderMethod = "getSsatBitPosValue";
+def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
+def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+ }],
+ imm1_32_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_32AsmOperand;
+}
+
+def imm1_16_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
+}]>;
+def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
+def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
+ imm1_16_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_16AsmOperand;
}
// Define ARM specific addressing modes.
-
-def MemMode2AsmOperand : AsmOperandClass {
- let Name = "MemMode2";
- let SuperClasses = [];
- let ParserMethod = "tryParseMemMode2Operand";
-}
-
-def MemMode3AsmOperand : AsmOperandClass {
- let Name = "MemMode3";
- let SuperClasses = [];
- let ParserMethod = "tryParseMemMode3Operand";
-}
-
// addrmode_imm12 := reg +/- imm12
//
+def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
def addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
// 12-bit immediate operand. Note that instructions using this encode
@@ -551,53 +617,129 @@ def addrmode_imm12 : Operand<i32>,
let EncoderMethod = "getAddrModeImm12OpValue";
let PrintMethod = "printAddrModeImm12Operand";
+ let DecoderMethod = "DecodeAddrModeImm12Operand";
+ let ParserMatchClass = MemImm12OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
// ldst_so_reg := reg +/- reg shop imm
//
+def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
def ldst_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
let EncoderMethod = "getLdStSORegOpValue";
// FIXME: Simplify the printer
let PrintMethod = "printAddrMode2Operand";
- let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+ let DecoderMethod = "DecodeSORegMemOperand";
+ let ParserMatchClass = MemRegOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift);
+}
+
+// postidx_imm8 := +/- [0,255]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value.
+def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
+def postidx_imm8 : Operand<i32> {
+ let PrintMethod = "printPostIdxImm8Operand";
+ let ParserMatchClass = PostIdxImm8AsmOperand;
+ let MIOperandInfo = (ops i32imm);
}
+// postidx_imm8s4 := +/- [0,1020]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value, scaled by 4.
+def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
+def postidx_imm8s4 : Operand<i32> {
+ let PrintMethod = "printPostIdxImm8s4Operand";
+ let ParserMatchClass = PostIdxImm8s4AsmOperand;
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// postidx_reg := +/- reg
+//
+def PostIdxRegAsmOperand : AsmOperandClass {
+ let Name = "PostIdxReg";
+ let ParserMethod = "parsePostIdxReg";
+}
+def postidx_reg : Operand<i32> {
+ let EncoderMethod = "getPostIdxRegOpValue";
+ let DecoderMethod = "DecodePostIdxReg";
+ let PrintMethod = "printPostIdxRegOperand";
+ let ParserMatchClass = PostIdxRegAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+
// addrmode2 := reg +/- imm12
// := reg +/- reg shop imm
//
+// FIXME: addrmode2 should be refactored the rest of the way to always
+// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
+def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
def addrmode2 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode2", []> {
let EncoderMethod = "getAddrMode2OpValue";
let PrintMethod = "printAddrMode2Operand";
- let ParserMatchClass = MemMode2AsmOperand;
+ let ParserMatchClass = AddrMode2AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
-def am2offset : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+def PostIdxRegShiftedAsmOperand : AsmOperandClass {
+ let Name = "PostIdxRegShifted";
+ let ParserMethod = "parsePostIdxReg";
+}
+def am2offset_reg : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ // When using this for assembly, it's always as a post-index offset.
+ let ParserMatchClass = PostIdxRegShiftedAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// FIXME: am2offset_imm should only need the immediate, not the GPR. Having
+// the GPR is purely vestigal at this point.
+def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
+def am2offset_imm : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
[], [SDNPWantRoot]> {
let EncoderMethod = "getAddrMode2OffsetOpValue";
let PrintMethod = "printAddrMode2OffsetOperand";
+ let ParserMatchClass = AM2OffsetImmAsmOperand;
let MIOperandInfo = (ops GPR, i32imm);
}
+
// addrmode3 := reg +/- reg
// addrmode3 := reg +/- imm8
//
+// FIXME: split into imm vs. reg versions.
+def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
def addrmode3 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode3", []> {
let EncoderMethod = "getAddrMode3OpValue";
let PrintMethod = "printAddrMode3Operand";
- let ParserMatchClass = MemMode3AsmOperand;
+ let ParserMatchClass = AddrMode3AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
+// FIXME: split into imm vs. reg versions.
+// FIXME: parser method to handle +/- register.
+def AM3OffsetAsmOperand : AsmOperandClass {
+ let Name = "AM3Offset";
+ let ParserMethod = "parseAM3Offset";
+}
def am3offset : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrMode3Offset",
[], [SDNPWantRoot]> {
let EncoderMethod = "getAddrMode3OffsetOpValue";
let PrintMethod = "printAddrMode3OffsetOperand";
+ let ParserMatchClass = AM3OffsetAsmOperand;
let MIOperandInfo = (ops GPR, i32imm);
}
@@ -608,28 +750,28 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
let PrintMethod = "printLdStmModeOperand";
}
-def MemMode5AsmOperand : AsmOperandClass {
- let Name = "MemMode5";
- let SuperClasses = [];
-}
-
// addrmode5 := reg +/- imm8*4
//
+def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
def addrmode5 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrMode5", []> {
let PrintMethod = "printAddrMode5Operand";
- let MIOperandInfo = (ops GPR:$base, i32imm);
- let ParserMatchClass = MemMode5AsmOperand;
let EncoderMethod = "getAddrMode5OpValue";
+ let DecoderMethod = "DecodeAddrMode5Operand";
+ let ParserMatchClass = AddrMode5AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm);
}
// addrmode6 := reg with optional alignment
//
+def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
def addrmode6 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
let PrintMethod = "printAddrMode6Operand";
- let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
let EncoderMethod = "getAddrMode6AddressOpValue";
+ let DecoderMethod = "DecodeAddrMode6Operand";
+ let ParserMatchClass = AddrMode6AsmOperand;
}
def am6offset : Operand<i32>,
@@ -638,6 +780,7 @@ def am6offset : Operand<i32>,
let PrintMethod = "printAddrMode6OffsetOperand";
let MIOperandInfo = (ops GPR);
let EncoderMethod = "getAddrMode6OffsetOpValue";
+ let DecoderMethod = "DecodeGPRRegisterClass";
}
// Special version of addrmode6 to handle alignment encoding for VST1/VLD1
@@ -666,19 +809,15 @@ def addrmodepc : Operand<i32>,
let MIOperandInfo = (ops GPR, i32imm);
}
-def MemMode7AsmOperand : AsmOperandClass {
- let Name = "MemMode7";
- let SuperClasses = [];
-}
-
-// addrmode7 := reg
-// Used by load/store exclusive instructions. Useful to enable right assembly
-// parsing and printing. Not used for any codegen matching.
+// addr_offset_none := reg
//
-def addrmode7 : Operand<i32> {
+def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
+def addr_offset_none : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
let PrintMethod = "printAddrMode7Operand";
- let MIOperandInfo = (ops GPR);
- let ParserMatchClass = MemMode7AsmOperand;
+ let DecoderMethod = "DecodeAddrMode7Operand";
+ let ParserMatchClass = MemNoOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base);
}
def nohash_imm : Operand<i32> {
@@ -687,25 +826,30 @@ def nohash_imm : Operand<i32> {
def CoprocNumAsmOperand : AsmOperandClass {
let Name = "CoprocNum";
- let SuperClasses = [];
- let ParserMethod = "tryParseCoprocNumOperand";
-}
-
-def CoprocRegAsmOperand : AsmOperandClass {
- let Name = "CoprocReg";
- let SuperClasses = [];
- let ParserMethod = "tryParseCoprocRegOperand";
+ let ParserMethod = "parseCoprocNumOperand";
}
-
def p_imm : Operand<i32> {
let PrintMethod = "printPImmediate";
let ParserMatchClass = CoprocNumAsmOperand;
+ let DecoderMethod = "DecodeCoprocessor";
}
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let ParserMethod = "parseCoprocRegOperand";
+}
def c_imm : Operand<i32> {
let PrintMethod = "printCImmediate";
let ParserMatchClass = CoprocRegAsmOperand;
}
+def CoprocOptionAsmOperand : AsmOperandClass {
+ let Name = "CoprocOption";
+ let ParserMethod = "parseCoprocOptionOperand";
+}
+def coproc_option_imm : Operand<i32> {
+ let PrintMethod = "printCoprocOptionImm";
+ let ParserMatchClass = CoprocOptionAsmOperand;
+}
//===----------------------------------------------------------------------===//
@@ -748,16 +892,37 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let Inst{11-4} = 0b00000000;
let Inst{3-0} = Rm;
}
- def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-0} = shift;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
// Assembly aliases for optional destination operand when it's the same
@@ -773,56 +938,172 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
cc_out:$s)>,
Requires<[IsARM]>;
def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
- so_reg:$shift, pred:$p,
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
cc_out:$s)>,
Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+
}
-/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
-/// instruction modifies the CPSR register.
-let isCodeGenOnly = 1, Defs = [CPSR] in {
-multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the AsI1_bin_irs counterpart.
+multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
- def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
let Inst{25} = 1;
- let Inst{20} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{11-0} = imm;
}
- def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ [/* pattern left blank */]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
- let isCommutable = Commutable;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
- let Inst{20} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-4} = 0b00000000;
- let Inst{3-0} = Rm;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
}
- def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
- let Inst{20} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-0} = shift;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+
+}
+
+/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
+multiclass AsI1_rbin_s_is<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
+
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]>;
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>;
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+}
+}
+
+/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
+multiclass AsI1_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>;
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>;
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>;
}
}
@@ -857,128 +1138,190 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
let Inst{11-4} = 0b00000000;
let Inst{3-0} = Rm;
}
- def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+ def rsi : AI1<opcod, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg_imm:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AI1<opcod, (outs),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
opc, "\t$Rn, $shift",
- [(opnode GPR:$Rn, so_reg:$shift)]> {
+ [(opnode GPR:$Rn, so_reg_reg:$shift)]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
let Inst{20} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = 0b0000;
- let Inst{11-0} = shift;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
+
}
}
/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
-multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
- def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
- IIC_iEXTr, opc, "\t$Rd, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rm))]>,
- Requires<[IsARM, HasV6]> {
- bits<4> Rd;
- bits<4> Rm;
- let Inst{19-16} = 0b1111;
- let Inst{15-12} = Rd;
- let Inst{11-10} = 0b00;
- let Inst{3-0} = Rm;
- }
- def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
- [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]> {
- bits<4> Rd;
- bits<4> Rm;
- bits<2> rot;
- let Inst{19-16} = 0b1111;
- let Inst{15-12} = Rd;
- let Inst{11-10} = rot;
- let Inst{3-0} = Rm;
- }
+class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
}
-multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
- def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
- IIC_iEXTr, opc, "\t$Rd, $Rm",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- let Inst{19-16} = 0b1111;
- let Inst{11-10} = 0b00;
- }
- def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- bits<2> rot;
- let Inst{19-16} = 0b1111;
- let Inst{11-10} = rot;
- }
+class AI_ext_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = rot;
}
/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
- def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn,
+ (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+}
+
+class AI_exta_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ string baseOpc, bit Commutable = 0> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
+ Requires<[IsARM]> {
bits<4> Rd;
- bits<4> Rm;
bits<4> Rn;
- let Inst{19-16} = Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
let Inst{15-12} = Rd;
- let Inst{11-10} = 0b00;
- let Inst{9-4} = 0b000111;
- let Inst{3-0} = Rm;
- }
- def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
- rot_imm:$rot),
- IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
- [(set GPR:$Rd, (opnode GPR:$Rn,
- (rotr GPR:$Rm, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
+ Requires<[IsARM]> {
bits<4> Rd;
+ bits<4> Rn;
bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
bits<4> Rn;
- bits<2> rot;
+ bits<12> shift;
+ let Inst{25} = 0;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-10} = rot;
- let Inst{9-4} = 0b000111;
- let Inst{3-0} = Rm;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
}
-}
-
-// For disassembly only.
-multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
- def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- let Inst{11-10} = 0b00;
- }
- def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
- rot_imm:$rot),
- IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
bits<4> Rn;
- bits<2> rot;
+ bits<12> shift;
+ let Inst{25} = 0;
let Inst{19-16} = Rn;
- let Inst{11-10} = rot;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
}
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
}
-/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
-multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc, bit Commutable = 0> {
- let Uses = [CPSR] in {
+/// AI1_rsc_irs - Define instructions and patterns for rsc
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
+ string baseOpc> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
Requires<[IsARM]> {
bits<4> Rd;
bits<4> Rn;
@@ -990,31 +1333,48 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM]> {
+ [/* pattern left blank */]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
- let isCommutable = Commutable;
let Inst{3-0} = Rm;
let Inst{15-12} = Rd;
let Inst{19-16} = Rn;
}
- def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
+ def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
Requires<[IsARM]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
- let Inst{11-0} = shift;
+ let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
}
+
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
@@ -1028,28 +1388,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
cc_out:$s)>,
Requires<[IsARM]>;
def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
- so_reg:$shift, pred:$p,
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
cc_out:$s)>,
Requires<[IsARM]>;
-}
-
-// Carry setting variants
-// NOTE: CPSR def omitted because it will be handled by the custom inserter.
-let usesCustomInserter = 1 in {
-multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
- def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- 4, IIC_iALUi,
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
- def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- 4, IIC_iALUr,
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
- let isCommutable = Commutable;
- }
- def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- 4, IIC_iALUsr,
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
-}
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1082,6 +1429,37 @@ multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
}
}
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+
multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
InstrItinClass iir, PatFrag opnode> {
// Note: We use the complex addrmode_imm12 rather than just an input
@@ -1110,6 +1488,37 @@ multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
let Inst{11-0} = shift{11-0};
}
}
+
+multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPRnopc:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -1140,42 +1549,66 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
[(ARMcallseq_start timm:$amt)]>;
}
-def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
- [/* For disassembly only; pattern left blank */]>,
+// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
+// (These psuedos use a hand-written selection code).
+let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
+def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
+ GPR:$set1, GPR:$set2),
+ NoItinerary, []>;
+}
+
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000000;
}
-def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
- [/* For disassembly only; pattern left blank */]>,
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000001;
}
-def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
- [/* For disassembly only; pattern left blank */]>,
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000010;
}
-def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
- [/* For disassembly only; pattern left blank */]>,
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000011;
}
-def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
- "\t$dst, $a, $b",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
+def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
+ "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1188,8 +1621,7 @@ def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
}
def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6T2]> {
+ []>, Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000100;
@@ -1206,14 +1638,11 @@ def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
let Inst{7-4} = 0b0111;
}
-// Change Processor State is a system instruction -- for disassembly and
-// parsing only.
-// FIXME: Since the asm parser has currently no clean way to handle optional
-// operands, create 3 versions of the same instruction. Once there's a clean
-// framework to represent optional operands, change this behavior.
+// Change Processor State
+// FIXME: We should use InstAlias to handle the optional operands.
class CPS<dag iops, string asm_ops>
: AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
- [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> {
+ []>, Requires<[IsARM]> {
bits<2> imod;
bits<3> iflags;
bits<5> mode;
@@ -1229,17 +1658,18 @@ class CPS<dag iops, string asm_ops>
let Inst{4-0} = mode;
}
+let DecoderMethod = "DecodeCPSInstruction" in {
let M = 1 in
- def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode),
"$imod\t$iflags, $mode">;
let mode = 0, M = 0 in
def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
let imod = 0, iflags = 0, M = 1 in
- def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">;
+ def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">;
+}
// Preload signals the memory system of possible future data/instruction access.
-// These are for disassembly only.
multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
@@ -1271,6 +1701,7 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
let Inst{19-16} = shift{16-13}; // Rn
let Inst{15-12} = 0b1111;
let Inst{11-0} = shift{11-0};
+ let Inst{4} = 0;
}
}
@@ -1278,10 +1709,8 @@ defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>;
defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
-def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
- "setend\t$end",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM]> {
+def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end", []>, Requires<[IsARM]> {
bits<1> end;
let Inst{31-10} = 0b1111000100000001000000;
let Inst{9} = end;
@@ -1351,14 +1780,17 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
// know until then which form of the instruction will be used.
def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
- MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> {
bits<4> Rd;
- bits<12> label;
+ bits<14> label;
let Inst{27-25} = 0b001;
+ let Inst{24} = 0;
+ let Inst{23-22} = label{13-12};
+ let Inst{21} = 0;
let Inst{20} = 0;
let Inst{19-16} = 0b1111;
let Inst{15-12} = Rd;
- let Inst{11-0} = label;
+ let Inst{11-0} = label{11-0};
}
def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>;
@@ -1424,6 +1856,7 @@ let isCall = 1,
let Inst{31-28} = 0b1110;
bits<24> func;
let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
}
def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
@@ -1432,6 +1865,7 @@ let isCall = 1,
Requires<[IsARM, IsNotDarwin]> {
bits<24> func;
let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
}
// ARMv5T and above
@@ -1516,6 +1950,7 @@ let isBranch = 1, isTerminator = 1 in {
[/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
bits<24> target;
let Inst{23-0} = target;
+ let DecoderMethod = "DecodeBranchImmInstruction";
}
let isBarrier = 1 in {
@@ -1549,9 +1984,9 @@ let isBranch = 1, isTerminator = 1 in {
}
-// BLX (immediate) -- for disassembly only
-def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary,
- "blx\t$target", [/* pattern left blank */]>,
+// BLX (immediate)
+def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
+ "blx\t$target", []>,
Requires<[IsARM, HasV5T]> {
let Inst{31-25} = 0b1111101;
bits<25> target;
@@ -1614,64 +2049,100 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
}
-
-
-
-
-// Secure Monitor Call is a system instruction -- for disassembly only
-def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
- [/* For disassembly only; pattern left blank */]> {
+// Secure Monitor Call is a system instruction.
+def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []> {
bits<4> opt;
let Inst{23-4} = 0b01100000000000000111;
let Inst{3-0} = opt;
}
-// Supervisor Call (Software Interrupt) -- for disassembly only
+// Supervisor Call (Software Interrupt)
let isCall = 1, Uses = [SP] in {
-def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
- [/* For disassembly only; pattern left blank */]> {
+def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> {
bits<24> svc;
let Inst{23-0} = svc;
}
}
-// Store Return State is a system instruction -- for disassembly only
-let isCodeGenOnly = 1 in { // FIXME: This should not use submode!
-def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
- NoItinerary, "srs${amode}\tsp!, $mode",
- [/* For disassembly only; pattern left blank */]> {
+// Store Return State
+class SRSI<bit wb, string asm>
+ : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<5> mode;
let Inst{31-28} = 0b1111;
- let Inst{22-20} = 0b110; // W = 1
- let Inst{19-8} = 0xd05;
- let Inst{7-5} = 0b000;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 1;
+ let Inst{21} = wb;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1101; // SP
+ let Inst{15-5} = 0b00000101000;
+ let Inst{4-0} = mode;
}
-def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
- NoItinerary, "srs${amode}\tsp, $mode",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-28} = 0b1111;
- let Inst{22-20} = 0b100; // W = 0
- let Inst{19-8} = 0xd05;
- let Inst{7-5} = 0b000;
+def SRSDA : SRSI<0, "srsda\tsp, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDB : SRSI<0, "srsdb\tsp, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSIA : SRSI<0, "srsia\tsp, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIB : SRSI<0, "srsib\tsp, $mode"> {
+ let Inst{24-23} = 0b11;
+}
+def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> {
+ let Inst{24-23} = 0b11;
}
-// Return From Exception is a system instruction -- for disassembly only
-def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
- NoItinerary, "rfe${amode}\t$base!",
- [/* For disassembly only; pattern left blank */]> {
+// Return From Exception
+class RFEI<bit wb, string asm>
+ : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<4> Rn;
let Inst{31-28} = 0b1111;
- let Inst{22-20} = 0b011; // W = 1
- let Inst{15-0} = 0x0a00;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0;
+ let Inst{21} = wb;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xa00;
}
-def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
- NoItinerary, "rfe${amode}\t$base",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-28} = 0b1111;
- let Inst{22-20} = 0b001; // W = 0
- let Inst{15-0} = 0x0a00;
+def RFEDA : RFEI<0, "rfeda\t$Rn"> {
+ let Inst{24-23} = 0;
+}
+def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> {
+ let Inst{24-23} = 0;
+}
+def RFEDB : RFEI<0, "rfedb\t$Rn"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEIA : RFEI<0, "rfeia\t$Rn"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIB : RFEI<0, "rfeib\t$Rn"> {
+ let Inst{24-23} = 0b11;
+}
+def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
+ let Inst{24-23} = 0b11;
}
-} // isCodeGenOnly = 1
//===----------------------------------------------------------------------===//
// Load / store Instructions.
@@ -1682,16 +2153,16 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
UnOpFrag<(load node:$Src)>>;
-defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
UnOpFrag<(zextloadi8 node:$Src)>>;
defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
// Special LDR for loads from non-pc-relative constpools.
let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
- isReMaterializable = 1 in
+ isReMaterializable = 1, isCodeGenOnly = 1 in
def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
[]> {
@@ -1727,34 +2198,65 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
// Indexed loads
multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
- def _PRE : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode2:$addr), IndexModePre, LdFrm, itin,
+ def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
- // {17-14} Rn
- // {13} 1 == Rm, 0 == imm12
- // {12} isAdd
- // {11-0} imm12/Rm
- bits<18> addr;
- let Inst{25} = addr{13};
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{16-13};
+ let Inst{11-0} = addr{11-0};
+ let DecoderMethod = "DecodeLDRPreImm";
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12";
+ }
+
+ def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
let Inst{23} = addr{12};
- let Inst{19-16} = addr{17-14};
+ let Inst{19-16} = addr{16-13};
let Inst{11-0} = addr{11-0};
- let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeLDRPreReg";
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2";
}
- def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins GPR:$Rn, am2offset:$offset),
+
+ def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
IndexModePost, LdFrm, itin,
- opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
- // {13} 1 == Rm, 0 == imm12
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
// {12} isAdd
// {11-0} imm12/Rm
bits<14> offset;
- bits<4> Rn;
- let Inst{25} = offset{13};
+ bits<4> addr;
+ let Inst{25} = 0;
let Inst{23} = offset{12};
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
+
}
let mayLoad = 1, neverHasSideEffects = 1 in {
@@ -1762,8 +2264,8 @@ defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
}
-multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
- def _PRE : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
+ def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addrmode3:$addr), IndexModePre,
LdMiscFrm, itin,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
@@ -1773,27 +2275,31 @@ multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
let Inst{19-16} = addr{12-9}; // Rn
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3";
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
- def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins GPR:$Rn, am3offset:$offset), IndexModePost,
- LdMiscFrm, itin,
- opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+ def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, itin,
+ opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ []> {
bits<10> offset;
- bits<4> Rn;
+ bits<4> addr;
let Inst{23} = offset{8}; // U bit
let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{11-8} = offset{7-4}; // imm7_4/zero
let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
}
let mayLoad = 1, neverHasSideEffects = 1 in {
-defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
-defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
-defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
+defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
let hasExtraDefRegAllocReq = 1 in {
-def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
(ins addrmode3:$addr), IndexModePre,
LdMiscFrm, IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!",
@@ -1804,70 +2310,128 @@ def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
let Inst{19-16} = addr{12-9}; // Rn
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ let AsmMatchConverter = "cvtLdrdPre";
}
-def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
- (ins GPR:$Rn, am3offset:$offset), IndexModePost,
- LdMiscFrm, IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, [$Rn], $offset",
- "$Rn = $Rn_wb", []> {
+def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
bits<10> offset;
- bits<4> Rn;
+ bits<4> addr;
let Inst{23} = offset{8}; // U bit
let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{11-8} = offset{7-4}; // imm7_4/zero
let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
} // hasExtraDefRegAllocReq = 1
} // mayLoad = 1, neverHasSideEffects = 1
-// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT.
let mayLoad = 1, neverHasSideEffects = 1 in {
-def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru,
- "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
- // {17-14} Rn
- // {13} 1 == Rm, 0 == imm12
+def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
// {12} isAdd
// {11-0} imm12/Rm
- bits<18> addr;
- let Inst{25} = addr{13};
- let Inst{23} = addr{12};
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
- let Inst{19-16} = addr{17-14};
- let Inst{11-0} = addr{11-0};
- let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
-}
-def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru,
- "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
- // {17-14} Rn
- // {13} 1 == Rm, 0 == imm12
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRT_POST_IMM : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
// {12} isAdd
// {11-0} imm12/Rm
- bits<18> addr;
- let Inst{25} = addr{13};
- let Inst{23} = addr{12};
- let Inst{21} = 1; // overwrite
- let Inst{19-16} = addr{17-14};
- let Inst{11-0} = addr{11-0};
- let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
-}
-def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
-def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+
+def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
-}
-def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRBT_POST_IMM : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+multiclass AI3ldrT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ let AsmMatchConverter = "cvtLdExtTWriteBackImm";
+ }
+ def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Inst{3-0} = Rm{3-0};
+ let AsmMatchConverter = "cvtLdExtTWriteBackReg";
+ }
}
+
+defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">;
+defm LDRHT : AI3ldrT<0b1011, "ldrht">;
+defm LDRSHT : AI3ldrT<0b1111, "ldrsht">;
}
// Store
@@ -1881,98 +2445,302 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
StMiscFrm, IIC_iStore_d_r,
- "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>;
+ "strd", "\t$Rt, $src2, $addr", []>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{21} = 0;
+}
// Indexed stores
-def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModePre, StFrm, IIC_iStore_ru,
- "str", "\t$Rt, [$Rn, $offset]!",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb,
- (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
-
-def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModePost, StFrm, IIC_iStore_ru,
- "str", "\t$Rt, [$Rn], $offset",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb,
- (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
-
-def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModePre, StFrm, IIC_iStore_bh_ru,
- "strb", "\t$Rt, [$Rn, $offset]!",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
- GPR:$Rn, am2offset:$offset))]>;
-def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModePost, StFrm, IIC_iStore_bh_ru,
- "strb", "\t$Rt, [$Rn], $offset",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
- GPR:$Rn, am2offset:$offset))]>;
-
-def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
- IndexModePre, StMiscFrm, IIC_iStore_ru,
- "strh", "\t$Rt, [$Rn, $offset]!",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb,
- (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
-
-def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
- IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
- "strh", "\t$Rt, [$Rn], $offset",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
- GPR:$Rn, am3offset:$offset))]>;
-
-// For disassembly only
+multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
+ def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+ StFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0}; // imm12
+ let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12";
+ let DecoderMethod = "DecodeSTRPreImm";
+ }
+
+ def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, ldst_so_reg:$addr),
+ IndexModePre, StFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0};
+ let Inst{4} = 0; // Inst{4} = 0
+ let AsmMatchConverter = "cvtStWriteBackRegAddrMode2";
+ let DecoderMethod = "DecodeSTRPreReg";
+ }
+ def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+defm STR : AI2_stridx<0, "str", IIC_iStore_ru>;
+defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>;
+}
+
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STR_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+}
+
+
+
+def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+ StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "cvtStWriteBackRegAddrMode3";
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+ addr_offset_none:$addr,
+ am3offset:$offset))]> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
- (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
- StMiscFrm, IIC_iStore_d_ru,
- "strd", "\t$src1, $src2, [$base, $offset]!",
- "$base = $base_wb", []>;
-
-// For disassembly only
-def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
- (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
- StMiscFrm, IIC_iStore_d_ru,
- "strd", "\t$src1, $src2, [$base], $offset",
- "$base = $base_wb", []>;
+def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ IndexModePre, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ let AsmMatchConverter = "cvtStrdPre";
+}
+
+def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr,
+ am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
-// STRT, STRBT, and STRHT are for disassembly only.
+// STRT, STRBT, and STRHT
-def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
- IndexModePost, StFrm, IIC_iStore_ru,
- "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
- [/* For disassembly only; pattern left blank */]> {
+def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
- let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRBT_POST_IMM : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
-def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
- IndexModePost, StFrm, IIC_iStore_bh_ru,
- "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
- [/* For disassembly only; pattern left blank */]> {
+let mayStore = 1, neverHasSideEffects = 1 in {
+def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRT_POST_IMM : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
- let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
}
-def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr),
- StMiscFrm, IIC_iStore_bh_ru,
- "strht", "\t$Rt, $addr", "$addr.base = $base_wb",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{21} = 1; // overwrite
- let AsmMatchConverter = "CvtStWriteBackRegAddrMode3";
+
+multiclass AI3strT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ let AsmMatchConverter = "cvtStExtTWriteBackImm";
+ }
+ def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Inst{3-0} = Rm{3-0};
+ let AsmMatchConverter = "cvtStExtTWriteBackReg";
+ }
}
+
+defm STRHT : AI3strT<0b1011, "strht">;
+
+
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -1996,6 +2764,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{24-23} = 0b01; // Increment After
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
}
def DA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
@@ -2012,6 +2782,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{24-23} = 0b00; // Decrement After
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
}
def DB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
@@ -2028,6 +2800,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{24-23} = 0b10; // Decrement Before
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
}
def IB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
@@ -2044,6 +2818,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{24-23} = 0b11; // Increment Before
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
}
}
@@ -2084,6 +2860,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
+def : ARMInstAlias<"movs${p} $Rd, $Rm",
+ (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
+
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -2097,15 +2876,33 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
let Inst{15-12} = Rd;
}
-def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
- DPSoRegFrm, IIC_iMOVsr,
- "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
+ DPSoRegRegFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src",
+ [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-8} = src{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = src{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = src{3-0};
+ let Inst{25} = 0;
+}
+
+def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
+ DPSoRegImmFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>,
UnaryDP {
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;
let Inst{19-16} = 0b0000;
- let Inst{11-0} = src;
+ let Inst{11-5} = src{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = src{3-0};
let Inst{25} = 0;
}
@@ -2121,7 +2918,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
-def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm),
DPFrm, IIC_iMOVi,
"movw", "\t$Rd, $imm",
[(set GPR:$Rd, imm0_65535:$imm)]>,
@@ -2133,16 +2930,22 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
let Inst{19-16} = imm{15-12};
let Inst{20} = 0;
let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
}
+def : InstAlias<"mov${p} $Rd, $imm",
+ (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>,
+ Requires<[IsARM]>;
+
def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
let Constraints = "$src = $Rd" in {
-def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
+def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
+ (ins GPR:$src, imm0_65535_expr:$imm),
DPFrm, IIC_iMOVi,
"movt", "\t$Rd, $imm",
- [(set GPR:$Rd,
+ [(set GPRnopc:$Rd,
(or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
Requires<[IsARM, HasV6T2]> {
@@ -2153,6 +2956,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
let Inst{19-16} = imm{15-12};
let Inst{20} = 0;
let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
}
def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
@@ -2186,30 +2990,28 @@ def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
// Sign extenders
-defm SXTB : AI_ext_rrot<0b01101010,
+def SXTB : AI_ext_rrot<0b01101010,
"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm SXTH : AI_ext_rrot<0b01101011,
+def SXTH : AI_ext_rrot<0b01101011,
"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm SXTAB : AI_exta_rrot<0b01101010,
+def SXTAB : AI_exta_rrot<0b01101010,
"sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm SXTAH : AI_exta_rrot<0b01101011,
+def SXTAH : AI_exta_rrot<0b01101011,
"sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-// For disassembly only
-defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
-// For disassembly only
-defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
// Zero extenders
let AddedComplexity = 16 in {
-defm UXTB : AI_ext_rrot<0b01101110,
+def UXTB : AI_ext_rrot<0b01101110,
"uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm UXTH : AI_ext_rrot<0b01101111,
+def UXTH : AI_ext_rrot<0b01101111,
"uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm UXTB16 : AI_ext_rrot<0b01101100,
+def UXTB16 : AI_ext_rrot<0b01101100,
"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
@@ -2217,23 +3019,22 @@ defm UXTB16 : AI_ext_rrot<0b01101100,
// instead so we can include a check for masking back in the upper
// eight bits of the source into the lower eight bits of the result.
//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
-// (UXTB16r_rot GPR:$Src, 24)>;
+// (UXTB16r_rot GPR:$Src, 3)>;
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (UXTB16r_rot GPR:$Src, 8)>;
+ (UXTB16 GPR:$Src, 1)>;
-defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+def UXTAH : AI_exta_rrot<0b01101111, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
}
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
-// For disassembly only
-defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
-def SBFX : I<(outs GPR:$Rd),
- (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+def SBFX : I<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
"sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
@@ -2250,7 +3051,7 @@ def SBFX : I<(outs GPR:$Rd),
}
def UBFX : I<(outs GPR:$Rd),
- (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width),
AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
"ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
@@ -2278,148 +3079,58 @@ defm SUB : AsI1_bin_irs<0b0010, "sub",
BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">;
// ADD and SUB with 's' bit set.
-defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+//
+// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
+defm ADDS : AsI1_bin_s_irs<0b0100, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
-defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AsI1_bin_s_irs<0b0010, "sub",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>,
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
"ADC", 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>,
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
"SBC">;
-// ADC and SUBC with 's' bit set.
-let usesCustomInserter = 1 in {
-defm ADCS : AI1_adde_sube_s_irs<
- BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
-defm SBCS : AI1_adde_sube_s_irs<
- BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
-}
-
-def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{11-0} = imm;
-}
-
-// The reg/reg form is only defined for the disassembler; for codegen it is
-// equivalent to SUBrr.
-def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
- IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<4> Rm;
- let Inst{11-4} = 0b00000000;
- let Inst{25} = 0;
- let Inst{3-0} = Rm;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
-
-def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{11-0} = shift;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
+defm RSB : AsI1_rbin_irs <0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">;
-// RSB with 's' bit set.
-// NOTE: CPSR def omitted because it will be handled by the custom inserter.
-let usesCustomInserter = 1 in {
-def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- 4, IIC_iALUi,
- [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>;
-def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- 4, IIC_iALUr,
- [/* For disassembly only; pattern left blank */]>;
-def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- 4, IIC_iALUsr,
- [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>;
-}
-
-let Uses = [CPSR] in {
-def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{11-0} = imm;
-}
-// The reg/reg form is only defined for the disassembler; for codegen it is
-// equivalent to SUBrr.
-def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<4> Rm;
- let Inst{11-4} = 0b00000000;
- let Inst{25} = 0;
- let Inst{3-0} = Rm;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
-def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{11-0} = shift;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
-}
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
+defm RSBS : AsI1_rbin_s_is<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
-// NOTE: CPSR def omitted because it will be handled by the custom inserter.
-let usesCustomInserter = 1, Uses = [CPSR] in {
-def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- 4, IIC_iALUi,
- [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>;
-def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- 4, IIC_iALUsr,
- [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>;
-}
+defm RSC : AI1_rsc_irs<0b0111, "rsc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
+ "RSC">;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
// assume opposite meanings of the carry flag (i.e., carry == !borrow).
// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
// details.
-def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
- (SUBri GPR:$src, so_imm_neg:$imm)>;
-def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
- (SUBSri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
+ (SUBSri GPR:$src, so_imm_neg:$imm)>;
+
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
-def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm),
- (SBCri GPR:$src, so_imm_not:$imm)>;
-def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
- (SBCSri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
+ (SBCri GPR:$src, so_imm_not:$imm)>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -2427,12 +3138,13 @@ def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
// (mul X, 2^n+1) -> (add (X << n), X)
// (mul X, 2^n-1) -> (rsb X, (X << n))
-// ARM Arithmetic Instruction -- for disassembly only
+// ARM Arithmetic Instruction
// GPR:$dst = GPR:$a op GPR:$b
class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
- list<dag> pattern = [/* For disassembly only; pattern left blank */],
- dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm">
- : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ list<dag> pattern = [],
+ dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
bits<4> Rn;
bits<4> Rd;
bits<4> Rm;
@@ -2443,17 +3155,19 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
let Inst{3-0} = Rm;
}
-// Saturating add/subtract -- for disassembly only
+// Saturating add/subtract
def QADD : AAI<0b00010000, 0b00000101, "qadd",
- [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))],
- (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
def QSUB : AAI<0b00010010, 0b00000101, "qsub",
- [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))],
- (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn),
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
"\t$Rd, $Rm, $Rn">;
-def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn),
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
"\t$Rd, $Rm, $Rn">;
def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
@@ -2469,7 +3183,7 @@ def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
-// Signed/Unsigned add/subtract -- for disassembly only
+// Signed/Unsigned add/subtract
def SASX : AAI<0b01100001, 0b11110011, "sasx">;
def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
@@ -2484,7 +3198,7 @@ def USAX : AAI<0b01100101, 0b11110101, "usax">;
def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
-// Signed/Unsigned halving add/subtract -- for disassembly only
+// Signed/Unsigned halving add/subtract
def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
@@ -2499,7 +3213,7 @@ def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
-// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+// Unsigned Sum of Absolute Differences [and Accumulate].
def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
MulFrm /* for convenience */, NoItinerary, "usad8",
@@ -2531,11 +3245,11 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
let Inst{3-0} = Rn;
}
-// Signed/Unsigned saturate -- for disassembly only
+// Signed/Unsigned saturate
-def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh),
- SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT : AI<(outs GPRnopc:$Rd),
+ (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
bits<4> Rd;
bits<5> sat_imm;
bits<4> Rn;
@@ -2544,14 +3258,14 @@ def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh),
let Inst{5-4} = 0b01;
let Inst{20-16} = sat_imm;
let Inst{15-12} = Rd;
- let Inst{11-7} = sh{7-3};
- let Inst{6} = sh{0};
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
let Inst{3-0} = Rn;
}
-def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm,
- NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> {
bits<4> Rd;
bits<4> sat_imm;
bits<4> Rn;
@@ -2562,9 +3276,9 @@ def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm,
let Inst{3-0} = Rn;
}
-def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
- SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
- [/* For disassembly only; pattern left blank */]> {
+def USAT : AI<(outs GPRnopc:$Rd),
+ (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
bits<4> Rd;
bits<5> sat_imm;
bits<4> Rn;
@@ -2572,15 +3286,15 @@ def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
let Inst{27-21} = 0b0110111;
let Inst{5-4} = 0b01;
let Inst{15-12} = Rd;
- let Inst{11-7} = sh{7-3};
- let Inst{6} = sh{0};
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
let Inst{20-16} = sat_imm;
let Inst{3-0} = Rn;
}
-def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
- NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
- [/* For disassembly only; pattern left blank */]> {
+def USAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> {
bits<4> Rd;
bits<4> sat_imm;
bits<4> Rn;
@@ -2591,8 +3305,10 @@ def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
let Inst{3-0} = Rn;
}
-def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
-def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos),
+ (SSAT imm:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
+ (USAT imm:$pos, GPRnopc:$a, 0)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -2611,6 +3327,10 @@ defm BIC : AsI1_bin_irs<0b1110, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
+// like in the actual instruction encoding. The complexity of mapping the mask
+// to the lsb/msb pair should be handled by ISel, not encapsulated in the
+// instruction description.
def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
"bfc", "\t$Rd, $imm", "$src = $Rd",
@@ -2622,16 +3342,16 @@ def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
let Inst{6-0} = 0b0011111;
let Inst{15-12} = Rd;
let Inst{11-7} = imm{4-0}; // lsb
- let Inst{20-16} = imm{9-5}; // width
+ let Inst{20-16} = imm{9-5}; // msb
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
- AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
- [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
- bf_inv_mask_imm:$imm))]>,
- Requires<[IsARM, HasV6T2]> {
+def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn,
+ bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
bits<4> Rd;
bits<4> Rn;
bits<10> imm;
@@ -2643,25 +3363,6 @@ def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
let Inst{3-0} = Rn;
}
-// GNU as only supports this form of bfi (w/ 4 arguments)
-let isAsmParserOnly = 1 in
-def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
- lsb_pos_imm:$lsb, width_imm:$width),
- AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
- []>, Requires<[IsARM, HasV6T2]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<5> lsb;
- bits<5> width;
- let Inst{27-21} = 0b0111110;
- let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
- let Inst{15-12} = Rd;
- let Inst{11-7} = lsb;
- let Inst{20-16} = width; // Custom encoder => lsb+width-1
- let Inst{3-0} = Rn;
-}
-
def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
"mvn", "\t$Rd, $Rm",
[(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
@@ -2673,15 +3374,31 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
let Inst{15-12} = Rd;
let Inst{3-0} = Rm;
}
-def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
- IIC_iMVNsr, "mvn", "\t$Rd, $shift",
- [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+}
+def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP {
bits<4> Rd;
bits<12> shift;
let Inst{25} = 0;
let Inst{19-16} = 0b0000;
let Inst{15-12} = Rd;
- let Inst{11-0} = shift;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
@@ -2820,8 +3537,8 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
bits<4> RdHi;
bits<4> Rm;
bits<4> Rn;
- let Inst{19-16} = RdLo;
- let Inst{15-12} = RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
let Inst{11-8} = Rm;
let Inst{3-0} = Rn;
}
@@ -2855,8 +3572,7 @@ def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
}
def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
Requires<[IsARM, HasV6]> {
let Inst{15-12} = 0b1111;
}
@@ -2869,8 +3585,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
- IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
- [/* For disassembly only; pattern left blank */]>,
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]>;
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
@@ -2881,8 +3596,7 @@ def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
- IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
- [/* For disassembly only; pattern left blank */]>,
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]>;
multiclass AI_smul<string opc, PatFrag opnode> {
@@ -2925,92 +3639,95 @@ multiclass AI_smul<string opc, PatFrag opnode> {
multiclass AI_smla<string opc, PatFrag opnode> {
- def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ let DecoderMethod = "DecodeSMLAInstruction" in {
+ def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra,
- (opnode (sext_inreg GPR:$Rn, i16),
- (sext_inreg GPR:$Rm, i16))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
Requires<[IsARM, HasV5TE]>;
- def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
- (sra GPR:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]>;
- def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
- (sext_inreg GPR:$Rm, i16))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
Requires<[IsARM, HasV5TE]>;
- def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
- (sra GPR:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]>;
- def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
- (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
+ (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
Requires<[IsARM, HasV5TE]>;
- def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
- (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
+ (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
Requires<[IsARM, HasV5TE]>;
+ }
}
defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
-def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm),
- IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+// Halfword multiply accumulate long: SMLAL<x><y>.
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV5TE]>;
-def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm),
- IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV5TE]>;
-def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm),
- IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV5TE]>;
-def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm),
- IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV5TE]>;
-// Helper class for AI_smld -- for disassembly only
+// Helper class for AI_smld.
class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
InstrItinClass itin, string opc, string asm>
: AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
bits<4> Rn;
bits<4> Rm;
- let Inst{4} = 1;
- let Inst{5} = swap;
- let Inst{6} = sub;
- let Inst{7} = 0;
- let Inst{21-20} = 0b00;
- let Inst{22} = long;
let Inst{27-23} = 0b01110;
+ let Inst{22} = long;
+ let Inst{21-20} = 0b00;
let Inst{11-8} = Rm;
+ let Inst{7} = 0;
+ let Inst{6} = sub;
+ let Inst{5} = swap;
+ let Inst{4} = 1;
let Inst{3-0} = Rn;
}
class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
@@ -3024,6 +3741,8 @@ class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
InstrItinClass itin, string opc, string asm>
: AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
bits<4> Ra;
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
let Inst{15-12} = Ra;
}
class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
@@ -3037,18 +3756,20 @@ class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
multiclass AI_smld<bit sub, string opc> {
- def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
- def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
- def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
!strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
- def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
!strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
}
@@ -3058,10 +3779,10 @@ defm SMLS : AI_smld<1, "smls">;
multiclass AI_sdml<bit sub, string opc> {
- def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
- def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+ def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
}
defm SMUA : AI_sdml<0, "smua">;
@@ -3100,55 +3821,38 @@ def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
(and (srl GPR:$Rm, (i32 8)), 0xFF)),
(REVSH GPR:$Rm)>;
-def lsl_shift_imm : SDNodeXForm<imm, [{
- unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
- return CurDAG->getTargetConstant(Sh, MVT::i32);
-}]>;
-
-def lsl_amt : ImmLeaf<i32, [{
- return Imm > 0 && Imm < 32;
-}], lsl_shift_imm>;
-
-def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh),
IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
- [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
- (and (shl GPR:$Rm, lsl_amt:$sh),
- 0xFFFF0000)))]>,
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
+ (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
+ 0xFFFF0000)))]>,
Requires<[IsARM, HasV6]>;
// Alternate cases for PKHBT where identities eliminate some nodes.
-def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
- (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
-def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
- (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
-
-def asr_shift_imm : SDNodeXForm<imm, [{
- unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
- return CurDAG->getTargetConstant(Sh, MVT::i32);
-}]>;
-
-def asr_amt : ImmLeaf<i32, [{
- return Imm > 0 && Imm <= 32;
-}], asr_shift_imm>;
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
-def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh),
IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
- [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
- (and (sra GPR:$Rm, asr_amt:$sh),
- 0xFFFF)))]>,
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
+ (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
+ 0xFFFF)))]>,
Requires<[IsARM, HasV6]>;
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
- (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
- (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (srl GPRnopc:$src2, imm16_31:$sh)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
@@ -3163,8 +3867,10 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
(CMPri GPR:$src, so_imm:$imm)>;
def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
(CMPrr GPR:$src, GPR:$rhs)>;
-def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs),
- (CMPrs GPR:$src, so_reg:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
+ (CMPrsi GPR:$src, so_reg_imm:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
+ (CMPrsr GPR:$src, so_reg_reg:$rhs)>;
// FIXME: We have to be careful when using the CMN instruction and comparison
// with 0. One would expect these two pieces of code should give identical
@@ -3250,15 +3956,23 @@ def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
4, IIC_iCMOVr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
-def MOVCCs : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg:$shift, pred:$p),
+def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_imm:$shift, pred:$p),
+ 4, IIC_iCMOVsr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_reg:$shift, pred:$p),
4, IIC_iCMOVsr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
+
let isMoveImm = 1 in
def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, i32imm_hilo16:$imm, pred:$p),
+ (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
4, IIC_iMOVi,
[]>,
RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
@@ -3288,9 +4002,14 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
// Atomic operations intrinsics
//
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let ParserMethod = "parseMemBarrierOptOperand";
+}
def memb_opt : Operand<i32> {
let PrintMethod = "printMemBOption";
let ParserMatchClass = MemBarrierOptOperand;
+ let DecoderMethod = "DecodeMemBarrierOption";
}
// memory barriers protect the atomic sequences
@@ -3321,8 +4040,16 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
let Inst{3-0} = opt;
}
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def ABS : ARMPseudoInst<
+ (outs GPR:$dst), (ins GPR:$src),
+ 8, NoItinerary, []>;
+}
+
let usesCustomInserter = 1 in {
- let Uses = [CPSR] in {
+ let Defs = [CPSR] in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
@@ -3437,44 +4164,47 @@ let usesCustomInserter = 1 in {
}
let mayLoad = 1 in {
-def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary,
"ldrexb", "\t$Rt, $addr", []>;
-def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
- "ldrexh", "\t$Rt, $addr", []>;
-def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
- "ldrex", "\t$Rt, $addr", []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrexh", "\t$Rt, $addr", []>;
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrex", "\t$Rt, $addr", []>;
let hasExtraDefRegAllocReq = 1 in
- def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr),
- NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>;
+def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
-def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
-def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
}
let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
- (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr),
- NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>;
+ (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
-// Clear-Exclusive is for disassembly only.
-def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
- [/* For disassembly only; pattern left blank */]>,
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
Requires<[IsARM, HasV7]> {
let Inst{31-0} = 0b11110101011111111111000000011111;
}
-// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
-let mayLoad = 1 in {
-def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp",
- [/* For disassembly only; pattern left blank */]>;
-def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
- [/* For disassembly only; pattern left blank */]>;
+// SWP/SWPB are deprecated in V6/V7.
+let mayLoad = 1, mayStore = 1 in {
+def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr),
+ "swp", []>;
+def SWPB: AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr),
+ "swpb", []>;
}
//===----------------------------------------------------------------------===//
@@ -3526,108 +4256,171 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
class ACI<dag oops, dag iops, string opc, string asm,
IndexMode im = IndexModeNone>
+ : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", []> {
+ let Inst{27-25} = 0b110;
+}
+class ACInoP<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
: InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
- opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+ opc, asm, "", []> {
+ let Inst{31-28} = 0b1111;
let Inst{27-25} = 0b110;
}
-
-multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{
-
- def _OFFSET : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> {
- let Inst{31-28} = op31_28;
+multiclass LdStCop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
let Inst{21} = 0; // W = 0
- let Inst{22} = 0; // D = 0
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def _PRE : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> {
- let Inst{31-28} = op31_28;
+ def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
let Inst{21} = 1; // W = 1
- let Inst{22} = 0; // D = 0
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def _POST : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> {
- let Inst{31-28} = op31_28;
+ def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
let Inst{21} = 1; // W = 1
- let Inst{22} = 0; // D = 0
let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
def _OPTION : ACI<(outs),
- !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option),
- ops),
- !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
- let Inst{31-28} = op31_28;
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 0; // P = 0
let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
let Inst{21} = 0; // W = 0
- let Inst{22} = 0; // D = 0
let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def L_OFFSET : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> {
- let Inst{31-28} = op31_28;
+}
+multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
let Inst{21} = 0; // W = 0
- let Inst{22} = 1; // D = 1
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def L_PRE : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!",
- IndexModePre> {
- let Inst{31-28} = op31_28;
+ def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
let Inst{21} = 1; // W = 1
- let Inst{22} = 1; // D = 1
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def L_POST : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr",
- IndexModePost> {
- let Inst{31-28} = op31_28;
+ def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
let Inst{21} = 1; // W = 1
- let Inst{22} = 1; // D = 1
let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def L_OPTION : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option),
- ops),
- !strconcat(!strconcat(opc, "l"), cond),
- "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
- let Inst{31-28} = op31_28;
+ def _OPTION : ACInoP<(outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 0; // P = 0
let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
let Inst{21} = 0; // W = 0
- let Inst{22} = 1; // D = 1
let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
}
}
-defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">;
-defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">;
-defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">;
-defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">;
+defm LDC : LdStCop <1, 0, "ldc">;
+defm LDCL : LdStCop <1, 1, "ldcl">;
+defm STC : LdStCop <0, 0, "stc">;
+defm STCL : LdStCop <0, 1, "stcl">;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2">;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l">;
+defm STC2 : LdSt2Cop<0, 0, "stc2">;
+defm STC2L : LdSt2Cop<0, 1, "stc2l">;
//===----------------------------------------------------------------------===//
-// Move between coprocessor and ARM core register -- for disassembly only
+// Move between coprocessor and ARM core register.
//
class MovRCopro<string opc, bit direction, dag oops, dag iops,
@@ -3660,8 +4453,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
imm:$CRm, imm:$opc2)]>;
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
- (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm,
- i32imm:$opc2), []>;
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -3697,15 +4490,14 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
imm:$CRm, imm:$opc2)]>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
- (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm,
- i32imm:$opc2), []>;
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
imm:$CRm, imm:$opc2),
(MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
-class MovRRCopro<string opc, bit direction,
- list<dag> pattern = [/* For disassembly only */]>
+class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
: ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
@@ -3730,8 +4522,7 @@ def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
imm:$CRm)]>;
def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
-class MovRRCopro2<string opc, bit direction,
- list<dag> pattern = [/* For disassembly only */]>
+class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
: ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary,
!strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
@@ -3758,20 +4549,22 @@ def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
//===----------------------------------------------------------------------===//
-// Move between special register and ARM core register -- for disassembly only
+// Move between special register and ARM core register
//
// Move to ARM core register from Special Register
-def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
- [/* For disassembly only; pattern left blank */]> {
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, apsr", []> {
bits<4> Rd;
let Inst{23-16} = 0b00001111;
let Inst{15-12} = Rd;
let Inst{7-4} = 0b0000;
}
-def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
- [/* For disassembly only; pattern left blank */]> {
+def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPR:$Rd, pred:$p)>, Requires<[IsARM]>;
+
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, spsr", []> {
bits<4> Rd;
let Inst{23-16} = 0b01001111;
let Inst{15-12} = Rd;
@@ -3785,8 +4578,7 @@ def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
// the mask with the fields to be accessed in the special register.
def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
- "msr", "\t$mask, $Rn",
- [/* For disassembly only; pattern left blank */]> {
+ "msr", "\t$mask, $Rn", []> {
bits<5> mask;
bits<4> Rn;
@@ -3800,8 +4592,7 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
}
def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
- "msr", "\t$mask, $a",
- [/* For disassembly only; pattern left blank */]> {
+ "msr", "\t$mask, $a", []> {
bits<5> mask;
bits<12> a;
@@ -4030,6 +4821,47 @@ def : ARMV5TEPat<(add GPR:$acc,
def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
Requires<[IsARM, HasV6]>;
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)),
+ (UXTAB GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)),
+ (UXTAH GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>;
+
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)),
+ (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)),
+ (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>;
+
+// Atomic load/store patterns
+def : ARMPat<(atomic_load_8 ldst_so_reg:$src),
+ (LDRBrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_8 addrmode_imm12:$src),
+ (LDRBi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_load_16 addrmode3:$src),
+ (LDRH addrmode3:$src)>;
+def : ARMPat<(atomic_load_32 ldst_so_reg:$src),
+ (LDRrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_32 addrmode_imm12:$src),
+ (LDRi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val),
+ (STRBrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val),
+ (STRBi12 GPR:$val, addrmode_imm12:$ptr)>;
+def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val),
+ (STRH GPR:$val, addrmode3:$ptr)>;
+def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val),
+ (STRrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val),
+ (STRi12 GPR:$val, addrmode_imm12:$ptr)>;
+
//===----------------------------------------------------------------------===//
// Thumb Support
@@ -4070,7 +4902,103 @@ def : MnemonicAlias<"swi", "svc">;
// Load / Store Multiple
def : MnemonicAlias<"ldmfd", "ldm">;
def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"ldmea", "ldmdb">;
def : MnemonicAlias<"stmfd", "stmdb">;
def : MnemonicAlias<"stmia", "stm">;
def : MnemonicAlias<"stmea", "stm">;
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+
+// PUSH/POP aliases for STM/LDM
+def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// SSAT/USAT optional shift operand.
+def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+
+
+// Extend instruction optional rotate operand.
+def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb${p} $Rd, $Rm",
+ (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb16${p} $Rd, $Rm",
+ (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxth${p} $Rd, $Rm",
+ (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb${p} $Rd, $Rm",
+ (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb16${p} $Rd, $Rm",
+ (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxth${p} $Rd, $Rm",
+ (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+
+// RFE aliases
+def : MnemonicAlias<"rfefa", "rfeda">;
+def : MnemonicAlias<"rfeea", "rfedb">;
+def : MnemonicAlias<"rfefd", "rfeia">;
+def : MnemonicAlias<"rfeed", "rfeib">;
+def : MnemonicAlias<"rfe", "rfeia">;
+
+// SRS aliases
+def : MnemonicAlias<"srsfa", "srsda">;
+def : MnemonicAlias<"srsea", "srsdb">;
+def : MnemonicAlias<"srsfd", "srsia">;
+def : MnemonicAlias<"srsed", "srsib">;
+def : MnemonicAlias<"srs", "srsia">;
+
+// QSAX == QSUBADDX
+def : MnemonicAlias<"qsubaddx", "qsax">;
+// SASX == SADDSUBX
+def : MnemonicAlias<"saddsubx", "sasx">;
+// SHASX == SHADDSUBX
+def : MnemonicAlias<"shaddsubx", "shasx">;
+// SHSAX == SHSUBADDX
+def : MnemonicAlias<"shsubaddx", "shsax">;
+// SSAX == SSUBADDX
+def : MnemonicAlias<"ssubaddx", "ssax">;
+// UASX == UADDSUBX
+def : MnemonicAlias<"uaddsubx", "uasx">;
+// UHASX == UHADDSUBX
+def : MnemonicAlias<"uhaddsubx", "uhasx">;
+// UHSAX == UHSUBADDX
+def : MnemonicAlias<"uhsubaddx", "uhsax">;
+// UQASX == UQADDSUBX
+def : MnemonicAlias<"uqaddsubx", "uqasx">;
+// UQSAX == UQSUBADDX
+def : MnemonicAlias<"uqsubaddx", "uqsax">;
+// USAX == USUBADDX
+def : MnemonicAlias<"usubaddx", "usax">;
+
+// LDRSBT/LDRHT/LDRSHT post-index offset if optional.
+// Note that the write-back output register is a dummy operand for MC (it's
+// only meaningful for codegen), so we just pass zero here.
+// FIXME: tblgen not cooperating with argument conversions.
+//def : InstAlias<"ldrsbt${p} $Rt, $addr",
+// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>;
+//def : InstAlias<"ldrht${p} $Rt, $addr",
+// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
+//def : InstAlias<"ldrsht${p} $Rt, $addr",
+// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 0df62f4..7aad186 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -11,6 +11,35 @@
//
//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NEON-specific Operands.
+//===----------------------------------------------------------------------===//
+def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
+def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
+def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
+def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = VectorIndex8Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = VectorIndex16Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = VectorIndex32Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
@@ -175,7 +204,8 @@ class VLDQQWBPseudo<InstrItinClass itin>
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
class VLDQQQQPseudo<InstrItinClass itin>
- : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">;
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
+ "$src = $dst">;
class VLDQQQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
@@ -190,6 +220,7 @@ class VLD1D<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
@@ -197,6 +228,7 @@ class VLD1Q<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
@@ -221,6 +253,7 @@ class VLD1DWB<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1QWB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
@@ -228,6 +261,7 @@ class VLD1QWB<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">;
@@ -252,12 +286,14 @@ class VLD1D3<bits<4> op7_4, string Dt>
"\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
"\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
@@ -280,6 +316,7 @@ class VLD1D4<bits<4> op7_4, string Dt>
"\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,
@@ -288,6 +325,7 @@ class VLD1D4WB<bits<4> op7_4, string Dt>
"\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
[]> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
@@ -310,6 +348,7 @@ class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
@@ -318,6 +357,7 @@ class VLD2Q<bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
@@ -343,6 +383,7 @@ class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
@@ -351,6 +392,7 @@ class VLD2QWB<bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
@@ -384,6 +426,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
@@ -402,6 +445,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
@@ -441,6 +485,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
@@ -459,6 +504,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
@@ -530,6 +576,7 @@ class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
(i32 (LoadOp addrmode6:$Rn)),
imm:$lane))]> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
}
class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag LoadOp>
@@ -541,6 +588,7 @@ class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
(i32 (LoadOp addrmode6oneL32:$Rn)),
imm:$lane))]> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
}
class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
@@ -580,7 +628,9 @@ class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
- "$src = $Vd, $Rn.addr = $wb", []>;
+ "$src = $Vd, $Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVLD1LN";
+}
def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
@@ -607,6 +657,7 @@ class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"$src1 = $Vd, $src2 = $dst2", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
}
def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
@@ -642,6 +693,7 @@ class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
}
def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
@@ -676,6 +728,7 @@ class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD3LN";
}
def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
@@ -712,7 +765,9 @@ class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
IIC_VLD3lnu, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
- []>;
+ []> {
+ let DecoderMethod = "DecodeVLD3LN";
+}
def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
@@ -748,6 +803,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN";
}
def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
@@ -788,6 +844,7 @@ class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
[]> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN" ;
}
def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
@@ -825,6 +882,7 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
[(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
}
class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
let Pattern = [(set QPR:$dst,
@@ -852,6 +910,7 @@ class VLD1QDUP<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
}
def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
@@ -864,12 +923,14 @@ class VLD1DUPWB<bits<4> op7_4, string Dt>
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
"vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
}
class VLD1QDUPWB<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
"vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
}
def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
@@ -891,6 +952,7 @@ class VLD2DUP<bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
}
def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">;
@@ -912,6 +974,7 @@ class VLD2DUPWB<bits<4> op7_4, string Dt>
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
"vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
}
def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">;
@@ -932,7 +995,8 @@ class VLD3DUP<bits<4> op7_4, string Dt>
(ins addrmode6dup:$Rn), IIC_VLD3dup,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
let Rm = 0b1111;
- let Inst{4} = Rn{4};
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
}
def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
@@ -954,7 +1018,8 @@ class VLD3DUPWB<bits<4> op7_4, string Dt>
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
}
def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
@@ -977,6 +1042,7 @@ class VLD4DUP<bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
}
def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
@@ -1000,6 +1066,7 @@ class VLD4DUPWB<bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
}
def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
@@ -1045,6 +1112,7 @@ class VST1D<bits<4> op7_4, string Dt>
IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs),
@@ -1052,6 +1120,7 @@ class VST1Q<bits<4> op7_4, string Dt>
"vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8 : VST1D<{0,0,0,?}, "8">;
@@ -1075,6 +1144,7 @@ class VST1DWB<bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
"vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST1QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
@@ -1082,6 +1152,7 @@ class VST1QWB<bits<4> op7_4, string Dt>
IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">;
@@ -1106,6 +1177,7 @@ class VST1D3<bits<4> op7_4, string Dt>
IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
@@ -1114,6 +1186,7 @@ class VST1D3WB<bits<4> op7_4, string Dt>
IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8T : VST1D3<{0,0,0,?}, "8">;
@@ -1137,6 +1210,7 @@ class VST1D4<bits<4> op7_4, string Dt>
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
@@ -1145,6 +1219,7 @@ class VST1D4WB<bits<4> op7_4, string Dt>
"vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
@@ -1167,6 +1242,7 @@ class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs),
@@ -1175,6 +1251,7 @@ class VST2Q<bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
@@ -1200,6 +1277,7 @@ class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
@@ -1208,6 +1286,7 @@ class VST2QWB<bits<4> op7_4, string Dt>
"vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
@@ -1241,6 +1320,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
@@ -1259,6 +1339,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
@@ -1298,6 +1379,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
@@ -1316,6 +1398,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
@@ -1381,6 +1464,7 @@ class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
[(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST1LN";
}
class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag StoreOp, SDNode ExtractOp>
@@ -1389,6 +1473,7 @@ class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
[(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNPseudo<IIC_VST1ln> {
@@ -1429,7 +1514,9 @@ class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
- addrmode6:$Rn, am6offset:$Rm))]>;
+ addrmode6:$Rn, am6offset:$Rm))]> {
+ let DecoderMethod = "DecodeVST1LN";
+}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNWBPseudo<IIC_VST1lnu> {
let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
@@ -1465,6 +1552,7 @@ class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
}
def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
@@ -1502,6 +1590,7 @@ class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
}
def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
@@ -1535,6 +1624,7 @@ class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST3LN";
}
def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
@@ -1569,7 +1659,9 @@ class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
IIC_VST3lnu, "vst3", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []>;
+ "$Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVST3LN";
+}
def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
@@ -1604,6 +1696,7 @@ class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
}
def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
@@ -1642,6 +1735,7 @@ class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
}
def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
@@ -4039,6 +4133,7 @@ class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
: N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
ResTy, OpTy, OpNode> {
let Inst{21-16} = op21_16;
+ let DecoderMethod = "DecodeVSHLMaxInstruction";
}
def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
v8i16, v8i8, NEONvshlli>;
@@ -4219,16 +4314,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
-let neverHasSideEffects = 1 in {
-// Pseudo vector move instructions for QQ and QQQQ registers. This should
-// be expanded after register allocation is completed.
-def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
- NoItinerary, []>;
-
-def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
- NoItinerary, []>;
-} // neverHasSideEffects
-
// VMOV : Vector Move (Immediate)
let isReMaterializable = 1 in {
@@ -4462,36 +4547,42 @@ def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
// VDUP : Vector Duplicate Lane (from scalar to all elements)
class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
- ValueType Ty>
- : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
- IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ ValueType Ty, Operand IdxTy>
+ : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
[(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy>
- : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
- IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ ValueType ResTy, ValueType OpTy, Operand IdxTy>
+ : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
[(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
- imm:$lane)))]>;
+ VectorIndex32:$lane)))]>;
// Inst{19-16} is partially specified depending on the element size.
-def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
+ bits<3> lane;
let Inst{19-17} = lane{2-0};
}
-def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
+ bits<2> lane;
let Inst{19-18} = lane{1-0};
}
-def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
+ bits<1> lane;
let Inst{19} = lane{0};
}
-def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
+ bits<3> lane;
let Inst{19-17} = lane{2-0};
}
-def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
+ bits<2> lane;
let Inst{19-18} = lane{1-0};
}
-def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
+ bits<1> lane;
let Inst{19} = lane{0};
}
@@ -4753,6 +4844,7 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
// Vector Table Lookup and Table Extension.
// VTBL : Vector Table Lookup
+let DecoderMethod = "DecodeTBLInstruction" in {
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
@@ -4815,6 +4907,7 @@ def VTBX3Pseudo
def VTBX4Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
IIC_VTBX4, "$orig = $dst", []>;
+} // DecoderMethod = "DecodeTBLInstruction"
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index bfe83ec..cedb547 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -19,6 +19,19 @@ def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def imm_sr_XFORM: SDNodeXForm<imm, [{
+ unsigned Imm = N->getZExtValue();
+ return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), MVT::i32);
+}]>;
+def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
+def imm_sr : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+}], imm_sr_XFORM> {
+ let PrintMethod = "printThumbSRImm";
+ let ParserMatchClass = ThumbSRImmAsmOperand;
+}
+
def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
}]>;
@@ -30,10 +43,6 @@ def imm0_7_neg : PatLeaf<(i32 imm), [{
return (uint32_t)-N->getZExtValue() < 8;
}], imm_neg_XFORM>;
-def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; }
-def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
- let ParserMatchClass = imm0_255_asmoperand;
-}
def imm0_255_comp : PatLeaf<(i32 imm), [{
return ~((uint32_t)N->getZExtValue()) < 256;
}]>;
@@ -69,8 +78,17 @@ def t_adrlabel : Operand<i32> {
}
// Scaled 4 immediate.
-def t_imm_s4 : Operand<i32> {
+def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; }
+def t_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_1020s4_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; }
+def t_imm0_508s4 : Operand<i32> {
let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_508s4_asmoperand;
let OperandType = "OPERAND_IMMEDIATE";
}
@@ -79,113 +97,129 @@ def t_imm_s4 : Operand<i32> {
let OperandType = "OPERAND_PCREL" in {
def t_brtarget : Operand<OtherVT> {
let EncoderMethod = "getThumbBRTargetOpValue";
+ let DecoderMethod = "DecodeThumbBROperand";
}
def t_bcctarget : Operand<i32> {
let EncoderMethod = "getThumbBCCTargetOpValue";
+ let DecoderMethod = "DecodeThumbBCCTargetOperand";
}
def t_cbtarget : Operand<i32> {
let EncoderMethod = "getThumbCBTargetOpValue";
+ let DecoderMethod = "DecodeThumbCmpBROperand";
}
def t_bltarget : Operand<i32> {
let EncoderMethod = "getThumbBLTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLTargetOperand";
}
def t_blxtarget : Operand<i32> {
let EncoderMethod = "getThumbBLXTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLXOffset";
}
}
-def MemModeRegThumbAsmOperand : AsmOperandClass {
- let Name = "MemModeRegThumb";
- let SuperClasses = [];
-}
-
-def MemModeImmThumbAsmOperand : AsmOperandClass {
- let Name = "MemModeImmThumb";
- let SuperClasses = [];
-}
-
// t_addrmode_rr := reg + reg
//
+def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
def t_addrmode_rr : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
}
// t_addrmode_rrs := reg + reg
//
+// We use separate scaled versions because the Select* functions need
+// to explicitly check for a matching constant and return false here so that
+// the reg+imm forms will match instead. This is a horrible way to do that,
+// as it forces tight coupling between the methods, but it's how selectiondag
+// currently works.
def t_addrmode_rrs1 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
- let ParserMatchClass = MemModeRegThumbAsmOperand;
}
def t_addrmode_rrs2 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
- let ParserMatchClass = MemModeRegThumbAsmOperand;
}
def t_addrmode_rrs4 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
- let ParserMatchClass = MemModeRegThumbAsmOperand;
}
// t_addrmode_is4 := reg + imm5 * 4
//
+def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
def t_addrmode_is4 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
let PrintMethod = "printThumbAddrModeImm5S4Operand";
+ let ParserMatchClass = t_addrmode_is4_asm_operand;
let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_is2 := reg + imm5 * 2
//
+def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
def t_addrmode_is2 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
let PrintMethod = "printThumbAddrModeImm5S2Operand";
+ let ParserMatchClass = t_addrmode_is2_asm_operand;
let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_is1 := reg + imm5
//
+def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
def t_addrmode_is1 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
let PrintMethod = "printThumbAddrModeImm5S1Operand";
+ let ParserMatchClass = t_addrmode_is1_asm_operand;
let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_sp := sp + imm8 * 4
//
+// FIXME: This really shouldn't have an explicit SP operand at all. It should
+// be implicit, just like in the instruction encoding itself.
+def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
def t_addrmode_sp : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
let EncoderMethod = "getAddrModeThumbSPOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeSP";
let PrintMethod = "printThumbAddrModeSPOperand";
+ let ParserMatchClass = t_addrmode_sp_asm_operand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_pc := <label> => pc + imm8 * 4
//
def t_addrmode_pc : Operand<i32> {
let EncoderMethod = "getAddrModePCOpValue";
- let ParserMatchClass = MemModeImmThumbAsmOperand;
+ let DecoderMethod = "DecodeThumbAddrModePC";
}
//===----------------------------------------------------------------------===//
@@ -207,68 +241,52 @@ def tADJCALLSTACKDOWN :
Requires<[IsThumb, IsThumb1Only]>;
}
-// T1Disassembly - A simple class to make encoding some disassembly patterns
-// easier and less verbose.
-class T1Disassembly<bits<2> op1, bits<8> op2>
+class T1SystemEncoding<bits<8> opc>
: T1Encoding<0b101111> {
- let Inst{9-8} = op1;
- let Inst{7-0} = op2;
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = opc;
}
-def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x00>; // A8.6.110
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
+ T1SystemEncoding<0x00>, // A8.6.110
+ Requires<[IsThumb2]>;
-def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x10>; // A8.6.410
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
+ T1SystemEncoding<0x10>; // A8.6.410
-def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x20>; // A8.6.408
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
+ T1SystemEncoding<0x20>; // A8.6.408
-def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x30>; // A8.6.409
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
+ T1SystemEncoding<0x30>; // A8.6.409
-def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x40>; // A8.6.157
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
+ T1SystemEncoding<0x40>; // A8.6.157
-// The i32imm operand $val can be used by a debugger to store more information
+// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
-def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> {
+def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
+ []>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b10;
// A8.6.22
bits<8> val;
let Inst{7-0} = val;
}
-def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
- [/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101101> {
- // A8.6.156
- let Inst{9-5} = 0b10010;
- let Inst{4} = 1;
- let Inst{3} = 1; // Big-Endian
- let Inst{2-0} = 0b000;
-}
-
-def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
- [/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101101> {
+def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
+ []>, T1Encoding<0b101101> {
+ bits<1> end;
// A8.6.156
let Inst{9-5} = 0b10010;
let Inst{4} = 1;
- let Inst{3} = 0; // Little-Endian
+ let Inst{3} = end;
let Inst{2-0} = 0b000;
}
// Change Processor State is a system instruction -- for disassembly only.
def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
- NoItinerary, "cps$imod $iflags",
- [/* For disassembly only; pattern left blank */]>,
+ NoItinerary, "cps$imod $iflags", []>,
T1Misc<0b0110011> {
// A8.6.38 & B6.1.1
bit imod;
@@ -277,6 +295,7 @@ def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
let Inst{4} = imod;
let Inst{3} = 0;
let Inst{2-0} = iflags;
+ let DecoderMethod = "DecodeThumbCPS";
}
// For both thumb1 and thumb2.
@@ -290,70 +309,70 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
let Inst{2-0} = dst;
}
-// PC relative add (ADR).
-def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
- "add\t$dst, pc, $rhs", []>,
- T1Encoding<{1,0,1,0,0,?}> {
- // A6.2 & A8.6.10
- bits<3> dst;
- bits<8> rhs;
- let Inst{10-8} = dst;
- let Inst{7-0} = rhs;
-}
-
// ADD <Rd>, sp, #<imm8>
-// This is rematerializable, which is particularly useful for taking the
-// address of locals.
-let isReMaterializable = 1 in
-def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
- "add\t$dst, $sp, $rhs", []>,
+// FIXME: This should not be marked as having side effects, and it should be
+// rematerializable. Clearing the side effect bit causes miscompilations,
+// probably because the instruction can be moved around.
+def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
+ IIC_iALUi, "add", "\t$dst, $sp, $imm", []>,
T1Encoding<{1,0,1,0,1,?}> {
// A6.2 & A8.6.8
bits<3> dst;
- bits<8> rhs;
+ bits<8> imm;
let Inst{10-8} = dst;
- let Inst{7-0} = rhs;
+ let Inst{7-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
}
// ADD sp, sp, #<imm7>
-def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
- "add\t$dst, $rhs", []>,
+def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "add", "\t$Rdn, $imm", []>,
T1Misc<{0,0,0,0,0,?,?}> {
// A6.2.5 & A8.6.8
- bits<7> rhs;
- let Inst{6-0} = rhs;
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
}
// SUB sp, sp, #<imm7>
// FIXME: The encoding and the ASM string don't match up.
-def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
- "sub\t$dst, $rhs", []>,
+def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "sub", "\t$Rdn, $imm", []>,
T1Misc<{0,0,0,0,1,?,?}> {
// A6.2.5 & A8.6.214
- bits<7> rhs;
- let Inst{6-0} = rhs;
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
}
+// Can optionally specify SP as a three operand instruction.
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>;
+def : tInstAlias<"sub${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>;
+
// ADD <Rm>, sp
-def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
- "add\t$dst, $rhs", []>,
+def tADDrSP : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPRsp:$sp), IIC_iALUr,
+ "add", "\t$Rdn, $sp, $Rn", []>,
T1Special<{0,0,?,?}> {
// A8.6.9 Encoding T1
- bits<4> dst;
- let Inst{7} = dst{3};
+ bits<4> Rdn;
+ let Inst{7} = Rdn{3};
let Inst{6-3} = 0b1101;
- let Inst{2-0} = dst{2-0};
+ let Inst{2-0} = Rdn{2-0};
+ let DecoderMethod = "DecodeThumbAddSPReg";
}
// ADD sp, <Rm>
-def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
- "add\t$dst, $rhs", []>,
+def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
T1Special<{0,0,?,?}> {
// A8.6.9 Encoding T2
- bits<4> dst;
+ bits<4> Rm;
let Inst{7} = 1;
- let Inst{6-3} = dst;
+ let Inst{6-3} = Rm;
let Inst{2-0} = 0b101;
+ let DecoderMethod = "DecodeThumbAddSPReg";
}
//===----------------------------------------------------------------------===//
@@ -390,11 +409,12 @@ let isCall = 1,
Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
- (outs), (ins t_bltarget:$func, variable_ops), IIC_Br,
- "bl\t$func",
+ (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
+ "bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb, IsNotDarwin]> {
- bits<21> func;
+ bits<22> func;
+ let Inst{26} = func{21};
let Inst{25-16} = func{20-11};
let Inst{13} = 1;
let Inst{11} = 1;
@@ -403,8 +423,8 @@ let isCall = 1,
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
- (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br,
- "blx\t$func",
+ (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]> {
bits<21> func;
@@ -416,8 +436,8 @@ let isCall = 1,
}
// Also used for Thumb2
- def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
- "blx\t$func",
+ def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]>,
T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
@@ -440,43 +460,22 @@ let isCall = 1,
Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
Uses = [R7, SP] in {
// Also used for Thumb2
- def tBLr9 : TIx2<0b11110, 0b11, 1,
- (outs), (ins pred:$p, t_bltarget:$func, variable_ops),
- IIC_Br, "bl${p}\t$func",
- [(ARMtcall tglobaladdr:$func)]>,
- Requires<[IsThumb, IsDarwin]> {
- bits<21> func;
- let Inst{25-16} = func{20-11};
- let Inst{13} = 1;
- let Inst{11} = 1;
- let Inst{10-0} = func{10-0};
- }
+ def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+ 4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
+ (tBL pred:$p, t_bltarget:$func)>,
+ Requires<[IsThumb, IsDarwin]>;
// ARMv5T and above, also used for Thumb2
- def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
- (outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
- IIC_Br, "blx${p}\t$func",
- [(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T, IsDarwin]> {
- bits<21> func;
- let Inst{25-16} = func{20-11};
- let Inst{13} = 1;
- let Inst{11} = 1;
- let Inst{10-1} = func{10-1};
- let Inst{0} = 0; // func{0} is assumed zero
- }
+ def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+ 4, IIC_Br, [(ARMcall tglobaladdr:$func)],
+ (tBLXi pred:$p, t_blxtarget:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
// Also used for Thumb2
- def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
- "blx${p}\t$func",
- [(ARMtcall GPR:$func)]>,
- Requires<[IsThumb, HasV5T, IsDarwin]>,
- T1Special<{1,1,1,?}> {
- // A6.2.3 & A8.6.24
- bits<4> func;
- let Inst{6-3} = func;
- let Inst{2-0} = 0b000;
- }
+ def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops),
+ 2, IIC_Br, [(ARMtcall GPR:$func)],
+ (tBLXr pred:$p, GPR:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
// ARMv4T
def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
@@ -487,8 +486,8 @@ let isCall = 1,
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
- def tB : T1I<(outs), (ins t_brtarget:$target), IIC_Br,
- "b\t$target", [(br bb:$target)]>,
+ def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
+ "b", "\t$target", [(br bb:$target)]>,
T1Encoding<{1,1,1,0,0,?}> {
bits<11> target;
let Inst{10-0} = target;
@@ -498,8 +497,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
// Just a pseudo for a tBL instruction. Needed to let regalloc know about
// the clobber of LR.
let Defs = [LR] in
- def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target),
- 4, IIC_Br, [], (tBL t_bltarget:$target)>;
+ def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p),
+ 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>;
def tBR_JTr : tPseudoInst<(outs),
(ins tGPR:$target, i32imm:$jt, i32imm:$id),
@@ -522,31 +521,6 @@ let isBranch = 1, isTerminator = 1 in
let Inst{7-0} = target;
}
-// Compare and branch on zero / non-zero
-let isBranch = 1, isTerminator = 1 in {
- def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
- "cbz\t$Rn, $target", []>,
- T1Misc<{0,0,?,1,?,?,?}> {
- // A8.6.27
- bits<6> target;
- bits<3> Rn;
- let Inst{9} = target{5};
- let Inst{7-3} = target{4-0};
- let Inst{2-0} = Rn;
- }
-
- def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br,
- "cbnz\t$cmp, $target", []>,
- T1Misc<{1,0,?,1,?,?,?}> {
- // A8.6.27
- bits<6> target;
- bits<3> Rn;
- let Inst{9} = target{5};
- let Inst{7-3} = target{4-0};
- let Inst{2-0} = Rn;
- }
-}
-
// Tail calls
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Darwin versions.
@@ -562,9 +536,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Non-Darwin versions (the difference is R9).
let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
Uses = [SP] in {
- def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops),
+ def tTAILJMPdND : tPseudoExpand<(outs),
+ (ins t_brtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
- (tB t_brtarget:$dst)>,
+ (tB t_brtarget:$dst, pred:$p)>,
Requires<[IsThumb, IsNotDarwin]>;
def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
4, IIC_Br, [],
@@ -574,11 +549,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
-// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.218 Supervisor Call (Software Interrupt)
// A8.6.16 B: Encoding T1
// If Inst{11-8} == 0b1111 then SEE SVC
let isCall = 1, Uses = [SP] in
-def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br,
+def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
"svc", "\t$imm", []>, Encoding16 {
bits<8> imm;
let Inst{15-12} = 0b1101;
@@ -653,17 +628,17 @@ defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
let AddedComplexity = 10 in
def tLDRSB : // A8.6.80
- T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
AddrModeT1_1, IIC_iLoad_bh_r,
- "ldrsb", "\t$dst, $addr",
- [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+ "ldrsb", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>;
let AddedComplexity = 10 in
def tLDRSH : // A8.6.84
- T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
AddrModeT1_2, IIC_iLoad_bh_r,
- "ldrsh", "\t$dst, $addr",
- [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+ "ldrsh", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
let canFoldAsLoad = 1 in
def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
@@ -678,7 +653,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
// Load tconstpool
// FIXME: Use ldr.n to work around a Darwin assembler bug.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
"ldr", ".n\t$Rt, $addr",
[(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
@@ -736,42 +711,53 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
// Load / store multiple Instructions.
//
-multiclass thumb_ldst_mult<string asm, InstrItinClass itin,
- InstrItinClass itin_upd, bits<6> T1Enc,
- bit L_bit> {
- def IA :
- T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>,
- T1Encoding<T1Enc> {
- bits<3> Rn;
- bits<8> regs;
- let Inst{10-8} = Rn;
- let Inst{7-0} = regs;
- }
- def IA_UPD :
- T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>,
- T1Encoding<T1Enc> {
- bits<3> Rn;
- bits<8> regs;
- let Inst{10-8} = Rn;
- let Inst{7-0} = regs;
- }
-}
-
// These require base address to be written back or one of the loaded regs.
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu,
- {1,1,0,0,1,?}, 1>;
-
+def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
+
+// Writeback version is just a pseudo, as there's no encoding difference.
+// Writeback happens iff the base register is not in the destination register
+// list.
+def tLDMIA_UPD :
+ InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "$Rn = $wb", IIC_iLoad_mu>,
+ PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> {
+ let Size = 2;
+ let OutOperandList = (outs GPR:$wb);
+ let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops);
+ let Pattern = [];
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// There is no non-writeback version of STM for Thumb.
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu,
- {1,1,0,0,0,?}, 0>;
+def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
+ (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ AddrModeNone, 2, IIC_iStore_mu,
+ "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>,
+ T1Encoding<{1,1,0,0,0,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
} // neverHasSideEffects
+def : InstAlias<"ldm${p} $Rn!, $regs",
+ (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iPop,
@@ -876,7 +862,7 @@ def tADC : // A8.6.2
// Add immediate
def tADDi3 : // A8.6.4 T1
- T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
IIC_iALUi,
"add", "\t$Rd, $Rm, $imm3",
[(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
@@ -885,8 +871,8 @@ def tADDi3 : // A8.6.4 T1
}
def tADDi8 : // A8.6.4 T2
- T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
- IIC_iALUi,
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
"add", "\t$Rdn, $imm8",
[(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
@@ -920,10 +906,10 @@ def tAND : // A8.6.12
// ASR immediate
def tASRri : // A8.6.14
- T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
IIC_iMOVsi,
"asr", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> {
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -962,7 +948,7 @@ def tCMNz : // A8.6.33
// CMP immediate
let isCompare = 1, Defs = [CPSR] in {
-def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi,
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
"cmp", "\t$Rn, $imm8",
[(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
T1General<{1,0,1,?,?}> {
@@ -1003,7 +989,7 @@ def tEOR : // A8.6.45
// LSL immediate
def tLSLri : // A8.6.88
- T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5),
IIC_iMOVsi,
"lsl", "\t$Rd, $Rm, $imm5",
[(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
@@ -1020,10 +1006,10 @@ def tLSLrr : // A8.6.89
// LSR immediate
def tLSRri : // A8.6.90
- T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
IIC_iMOVsi,
"lsr", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -1047,6 +1033,10 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
let Inst{10-8} = Rd;
let Inst{7-0} = imm8;
}
+// Because we have an explicit tMOVSr below, we need an alias to handle
+// the immediate "movs" form here. Blech.
+def : tInstAlias <"movs $Rdn, $imm",
+ (tMOVi8 tGPR:$Rdn, CPSR, imm0_255:$imm, 14, 0)>;
// A7-73: MOV(2) - mov setting flag.
@@ -1077,10 +1067,19 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
// Multiply register
let isCommutable = 1 in
def tMUL : // A8.6.105 T1
- T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
- IIC_iMUL32,
- "mul", "\t$Rdn, $Rm, $Rdn",
- [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>;
+ Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
+ [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
+ T1DataProcessing<0b1101> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+ let AsmMatchConverter = "cvtThumbMultiply";
+}
+
+def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn,
+ pred:$p)>;
// Move inverse register
def tMVN : // A8.6.107
@@ -1132,6 +1131,9 @@ def tRSB : // A8.6.141
"rsb", "\t$Rd, $Rn, #0",
[(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
// Subtract with carry register
let Uses = [CPSR] in
def tSBC : // A8.6.151
@@ -1142,7 +1144,7 @@ def tSBC : // A8.6.151
// Subtract immediate
def tSUBi3 : // A8.6.210 T1
- T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+ T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
IIC_iALUi,
"sub", "\t$Rd, $Rm, $imm3",
[(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
@@ -1151,8 +1153,8 @@ def tSUBi3 : // A8.6.210 T1
}
def tSUBi8 : // A8.6.210 T2
- T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
- IIC_iALUi,
+ T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
"sub", "\t$Rdn, $imm8",
[(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
@@ -1163,8 +1165,6 @@ def tSUBrr : // A8.6.212
"sub", "\t$Rd, $Rn, $Rm",
[(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
-// TODO: A7-96: STMIA - store multiple.
-
// Sign-extend byte
def tSXTB : // A8.6.222
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1216,12 +1216,13 @@ let usesCustomInserter = 1 in // Expanded after instruction selection.
// assembler.
def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
- IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>,
+ IIC_iALUi, "adr{$p}\t$Rd, $addr", []>,
T1Encoding<{1,0,1,0,0,?}> {
bits<3> Rd;
bits<8> addr;
let Inst{10-8} = Rd;
let Inst{7-0} = addr;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
}
let neverHasSideEffects = 1, isReMaterializable = 1 in
@@ -1361,6 +1362,31 @@ def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
(tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
+ (tLDRBi t_addrmode_is1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src),
+ (tLDRBr t_addrmode_rrs1:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
+ (tLDRHi t_addrmode_is2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src),
+ (tLDRHr t_addrmode_rrs2:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
+ (tLDRi t_addrmode_is4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src),
+ (tLDRr t_addrmode_rrs4:$src)>;
+def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
+ (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val),
+ (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
+ (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val),
+ (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
+ (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val),
+ (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>;
+
// Large immediate handling.
// Two piece imms.
@@ -1395,3 +1421,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
2, IIC_Br, [(brind GPR:$Rm)],
(tMOVr PC, GPR:$Rm, pred:$p)>;
}
+
+
+// In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00
+// encoding is available on ARMv6K, but we don't differentiate that finely.
+def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
+
+
+// For round-trip assembly/disassembly, we have to handle a CPS instruction
+// without any iflags. That's not, strictly speaking, valid syntax, but it's
+// a useful extention and assembles to defined behaviour (the insn does
+// nothing).
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c2c6cbc..471ec29 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -12,13 +12,32 @@
//===----------------------------------------------------------------------===//
// IT block predicate field
+def it_pred_asmoperand : AsmOperandClass {
+ let Name = "ITCondCode";
+ let ParserMethod = "parseITCondCode";
+}
def it_pred : Operand<i32> {
let PrintMethod = "printMandatoryPredicateOperand";
+ let ParserMatchClass = it_pred_asmoperand;
}
// IT block condition mask
+def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; }
def it_mask : Operand<i32> {
let PrintMethod = "printThumbITMask";
+ let ParserMatchClass = it_mask_asmoperand;
+}
+
+// t2_shift_imm: An integer that encodes a shift amount and the type of shift
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 not allowed
+def t2_shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterImmAsmOperand;
+ let DecoderMethod = "DecodeT2ShifterImmOperand";
}
// Shifted operands. No register controlled shifts for Thumb2.
@@ -28,6 +47,8 @@ def t2_so_reg : Operand<i32>, // reg imm
[shl,srl,sra,rotr]> {
let EncoderMethod = "getT2SORegOpValue";
let PrintMethod = "printT2SOOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
let MIOperandInfo = (ops rGPR, i32imm);
}
@@ -50,6 +71,7 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
}]> {
let ParserMatchClass = t2_so_imm_asmoperand;
let EncoderMethod = "getT2SOImmOpValue";
+ let DecoderMethod = "DecodeT2SOImm";
}
// t2_so_imm_not - Match an immediate that is a complement
@@ -65,11 +87,6 @@ def t2_so_imm_neg : Operand<i32>,
return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
}], t2_so_imm_neg_XFORM>;
-/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
-def imm1_31 : ImmLeaf<i32, [{
- return (int32_t)Imm >= 1 && (int32_t)Imm < 32;
-}]>;
-
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
def imm0_4095 : Operand<i32>,
ImmLeaf<i32, [{
@@ -96,17 +113,20 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
// Define Thumb2 specific addressing modes.
// t2addrmode_imm12 := reg + imm12
+def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
def t2addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
let PrintMethod = "printAddrModeImm12Operand";
let EncoderMethod = "getAddrModeImm12OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm12";
+ let ParserMatchClass = t2addrmode_imm12_asmoperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemMode5AsmOperand;
}
// t2ldrlabel := imm12
def t2ldrlabel : Operand<i32> {
let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printT2LdrLabelOperand";
}
@@ -116,13 +136,36 @@ def t2adrlabel : Operand<i32> {
}
+// t2addrmode_posimm8 := reg + imm8
+def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
+def t2addrmode_posimm8 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemPosImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_negimm8 := reg - imm8
+def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
+def t2addrmode_negimm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemNegImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
// t2addrmode_imm8 := reg +/- imm8
+def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
def t2addrmode_imm8 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
let PrintMethod = "printT2AddrModeImm8Operand";
let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemImm8OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemMode5AsmOperand;
}
def t2am_imm8_offset : Operand<i32>,
@@ -130,38 +173,61 @@ def t2am_imm8_offset : Operand<i32>,
[], [SDNPWantRoot]> {
let PrintMethod = "printT2AddrModeImm8OffsetOperand";
let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
- let ParserMatchClass = MemMode5AsmOperand;
+ let DecoderMethod = "DecodeT2Imm8";
}
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
+def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
def t2addrmode_imm8s4 : Operand<i32> {
let PrintMethod = "printT2AddrModeImm8s4Operand";
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8s4";
+ let ParserMatchClass = MemImm8s4OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemMode5AsmOperand;
}
+def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
def t2am_imm8s4_offset : Operand<i32> {
let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+ let EncoderMethod = "getT2Imm8s4OpValue";
+ let DecoderMethod = "DecodeT2Imm8S4";
+}
+
+// t2addrmode_imm0_1020s4 := reg + (imm8 << 2)
+def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
+ let Name = "MemImm0_1020s4Offset";
+}
+def t2addrmode_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
+ let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm0_1020s4";
+ let ParserMatchClass = MemImm0_1020s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
}
// t2addrmode_so_reg := reg + (reg << imm2)
+def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
def t2addrmode_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
let PrintMethod = "printT2AddrModeSoRegOperand";
let EncoderMethod = "getT2AddrModeSORegOpValue";
+ let DecoderMethod = "DecodeT2AddrModeSOReg";
+ let ParserMatchClass = t2addrmode_so_reg_asmoperand;
let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
- let ParserMatchClass = MemMode5AsmOperand;
}
-// t2addrmode_reg := reg
-// Used by load/store exclusive instructions. Useful to enable right assembly
-// parsing and printing. Not used for any codegen matching.
-//
-def t2addrmode_reg : Operand<i32> {
- let PrintMethod = "printAddrMode7Operand";
- let MIOperandInfo = (ops GPR);
- let ParserMatchClass = MemMode7AsmOperand;
+// Addresses for the TBB/TBH instructions.
+def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
+def addrmode_tbb : Operand<i32> {
+ let PrintMethod = "printAddrModeTBB";
+ let ParserMatchClass = addrmode_tbb_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
+}
+def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
+def addrmode_tbh : Operand<i32> {
+ let PrintMethod = "printAddrModeTBH";
+ let ParserMatchClass = addrmode_tbh_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
}
//===----------------------------------------------------------------------===//
@@ -419,47 +485,6 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
}
-/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
-/// unary operation that produces a value. These are predicable and can be
-/// changed to modify CPSR.
-multiclass T2I_un_irs<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
- // shifted imm
- def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
- opc, "\t$Rd, $imm",
- [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
- let isAsCheapAsAMove = Cheap;
- let isReMaterializable = ReMat;
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15} = 0;
- }
- // register
- def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
- opc, ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{14-12} = 0b000; // imm3
- let Inst{7-6} = 0b00; // imm2
- let Inst{5-4} = 0b00; // type
- }
- // shifted register
- def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
- opc, ".w\t$Rd, $ShiftedRm",
- [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- }
-}
-
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
@@ -500,21 +525,18 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
}
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
(!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
t2_so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsThumb2]>;
- def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
(!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
rGPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsThumb2]>;
- def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
(!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
t2_so_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsThumb2]>;
+ cc_out:$s)>;
}
/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
@@ -522,7 +544,27 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
PatFrag opnode, string baseOpc, bit Commutable = 0> :
- T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">;
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+}
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
/// reversed. The 'rr' form is only defined for the disassembler; for codegen
@@ -563,45 +605,28 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
/// instruction modifies the CPSR register.
-let isCodeGenOnly = 1, Defs = [CPSR] in {
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
PatFrag opnode, bit Commutable = 0> {
// shifted imm
- def ri : T2TwoRegImm<
+ def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
- !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- let Inst{15} = 0;
- }
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>;
// register
- def rr : T2ThreeReg<
+ def rr : T2sThreeReg<
(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
- !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
- let isCommutable = Commutable;
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- let Inst{14-12} = 0b000; // imm3
- let Inst{7-6} = 0b00; // imm2
- let Inst{5-4} = 0b00; // type
- }
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>;
// shifted register
- def rs : T2TwoRegShiftedReg<
+ def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
- !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- }
+ opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>;
}
}
@@ -614,9 +639,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
def ri : T2sTwoRegImm<
- (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
- opc, ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
@@ -626,9 +651,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
}
// 12-bit imm
def ri12 : T2I<
- (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
!strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -644,9 +669,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{7-0} = imm{7-0};
}
// register
- def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr,
- opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+ def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm),
+ IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -658,9 +683,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
}
// shifted register
def rs : T2sTwoRegShiftedReg<
- (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
@@ -671,13 +696,13 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
/// for a binary operation that produces a value and use the carry
/// bit. It's not predicable.
-let Uses = [CPSR] in {
+let Defs = [CPSR], Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -687,7 +712,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
// register
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -701,7 +726,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -710,64 +735,35 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
-// Carry setting variants
-// NOTE: CPSR def omitted because it will be handled by the custom inserter.
-let usesCustomInserter = 1 in {
-multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
- // shifted imm
- def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
- 4, IIC_iALUi,
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
- // register
- def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
- 4, IIC_iALUr,
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
- let isCommutable = Commutable;
- }
- // shifted register
- def rs : t2PseudoInst<
- (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- 4, IIC_iALUsi,
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
-}
-}
-
/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
/// version is not needed since this is only for codegen.
-let isCodeGenOnly = 1, Defs = [CPSR] in {
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2TwoRegImm<
+ def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
- !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- let Inst{15} = 0;
- }
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>;
// shifted register
- def rs : T2TwoRegShiftedReg<
+ def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- }
+ IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>;
}
}
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
-multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
+ string baseOpc> {
// 5-bit imm
def ri : T2sTwoRegShiftImm<
- (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi,
+ (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
opc, ".w\t$Rd, $Rm, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
@@ -784,20 +780,50 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
let Inst{15-12} = 0b1111;
let Inst{7-4} = 0b0000;
}
+
+ // Optional destination register
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
}
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let isCompare = 1, Defs = [CPSR] in {
multiclass T2I_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode> {
+ PatFrag opnode, string baseOpc> {
+let isCompare = 1, Defs = [CPSR] in {
// shifted imm
def ri : T2OneRegCmpImm<
- (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
opc, ".w\t$Rn, $imm",
- [(opnode GPR:$Rn, t2_so_imm:$imm)]> {
+ [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -807,9 +833,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc,
}
// register
def rr : T2TwoRegCmp<
- (outs), (ins GPR:$lhs, rGPR:$rhs), iir,
- opc, ".w\t$lhs, $rhs",
- [(opnode GPR:$lhs, rGPR:$rhs)]> {
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
+ opc, ".w\t$Rn, $Rm",
+ [(opnode GPRnopc:$Rn, rGPR:$Rm)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -821,9 +847,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc,
}
// shifted register
def rs : T2OneRegCmpShiftedReg<
- (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
opc, ".w\t$Rn, $ShiftedRm",
- [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> {
+ [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -831,55 +857,60 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc,
let Inst{11-8} = 0b1111; // Rd
}
}
+
+ // Assembler aliases w/o the ".w" suffix.
+ // No alias here for 'rr' version as not all instantiations of this
+ // multiclass want one (CMP in particular, does not).
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn,
+ t2_so_imm:$imm, pred:$p)>;
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn,
+ t2_so_reg:$shift,
+ pred:$p)>;
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
- InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
- def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-25} = 0b00;
+ [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-25} = 0b1111100;
let Inst{24} = signed;
let Inst{23} = 1;
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
-
- bits<4> Rt;
- let Inst{15-12} = Rt;
-
- bits<17> addr;
- let addr{12} = 1; // add = TRUE
let Inst{19-16} = addr{16-13}; // Rn
- let Inst{23} = addr{12}; // U
+ let Inst{15-12} = Rt;
let Inst{11-0} = addr{11-0}; // imm
}
- def i8 : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii,
+ def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> {
+ bits<4> Rt;
+ bits<13> addr;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
let Inst{23} = 0;
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt;
let Inst{11} = 1;
// Offset: index==TRUE, wback==FALSE
let Inst{10} = 1; // The P bit.
- let Inst{8} = 0; // The W bit.
-
- bits<4> Rt;
- let Inst{15-12} = Rt;
-
- bits<13> addr;
- let Inst{19-16} = addr{12-9}; // Rn
let Inst{9} = addr{8}; // U
+ let Inst{8} = 0; // The W bit.
let Inst{7-0} = addr{7-0}; // imm
}
- def s : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+ def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
@@ -895,12 +926,14 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let Inst{19-16} = addr{9-6}; // Rn
let Inst{3-0} = addr{5-2}; // Rm
let Inst{5-4} = addr{1-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadShift";
}
// FIXME: Is the pci variant actually needed?
- def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii,
+ def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
let isReMaterializable = 1;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -918,10 +951,11 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
multiclass T2I_st<bits<2> opcod, string opc,
- InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
- def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_imm12:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0001;
let Inst{22-21} = opcod;
@@ -936,9 +970,9 @@ multiclass T2I_st<bits<2> opcod, string opc,
let Inst{23} = addr{12}; // U
let Inst{11-0} = addr{11-0}; // imm
}
- def i8 : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii,
+ def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -956,9 +990,9 @@ multiclass T2I_st<bits<2> opcod, string opc,
let Inst{9} = addr{8}; // U
let Inst{7-0} = addr{7-0}; // imm
}
- def s : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis,
+ def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -977,146 +1011,81 @@ multiclass T2I_st<bits<2> opcod, string opc,
/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
- opc, ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
- opc, ".w\t$Rd, $Rm, ror $rot",
- [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
}
// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
-multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
- opc, "\t$Rd, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot",
- [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
}
// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
// supported yet.
-multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
- def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
- opc, "\t$Rd, $Rm", []>,
+class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, "\t$Rd, $Rm$rot", []>,
Requires<[IsThumb2, HasT2ExtractPack]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
- opc, "\t$Rd, $Rm, ror $rot", []>,
- Requires<[IsThumb2, HasT2ExtractPack]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
}
/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
- opc, "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def rr_rot : T2ThreeReg<(outs rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
- [(set rGPR:$Rd, (opnode rGPR:$Rn,
- (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
}
-// DO variant - disassembly only, no pattern
-
-multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
- def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
- opc, "\t$Rd, $Rn, $Rm", []> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
- IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+class T2I_exta_rrot_np<bits<3> opcod, string opc>
+ : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
}
//===----------------------------------------------------------------------===//
@@ -1143,7 +1112,7 @@ class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
// assembler.
def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
(ins t2adrlabel:$addr, pred:$p),
- IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> {
+ IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -1160,6 +1129,8 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
let Inst{26} = addr{11};
let Inst{14-12} = addr{10-8};
let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2Adr";
}
let neverHasSideEffects = 1, isReMaterializable = 1 in
@@ -1177,33 +1148,33 @@ def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
// Load
let canFoldAsLoad = 1, isReMaterializable = 1 in
-defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
UnOpFrag<(load node:$Src)>>;
// Loads with zero extension
defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- UnOpFrag<(zextloadi16 node:$Src)>>;
+ rGPR, UnOpFrag<(zextloadi16 node:$Src)>>;
defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- UnOpFrag<(zextloadi8 node:$Src)>>;
+ rGPR, UnOpFrag<(zextloadi8 node:$Src)>>;
// Loads with sign extension
defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- UnOpFrag<(sextloadi16 node:$Src)>>;
+ rGPR, UnOpFrag<(sextloadi16 node:$Src)>>;
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- UnOpFrag<(sextloadi8 node:$Src)>>;
+ rGPR, UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>;
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
(t2LDRBi12 t2addrmode_imm12:$addr)>;
-def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr),
- (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
(t2LDRBs t2addrmode_so_reg:$addr)>;
def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
@@ -1214,8 +1185,8 @@ def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
// earlier?
def : T2Pat<(extloadi1 t2addrmode_imm12:$addr),
(t2LDRBi12 t2addrmode_imm12:$addr)>;
-def : T2Pat<(extloadi1 t2addrmode_imm8:$addr),
- (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr),
(t2LDRBs t2addrmode_so_reg:$addr)>;
def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
@@ -1223,8 +1194,8 @@ def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
def : T2Pat<(extloadi8 t2addrmode_imm12:$addr),
(t2LDRBi12 t2addrmode_imm12:$addr)>;
-def : T2Pat<(extloadi8 t2addrmode_imm8:$addr),
- (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr),
(t2LDRBs t2addrmode_so_reg:$addr)>;
def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
@@ -1232,8 +1203,8 @@ def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
(t2LDRHi12 t2addrmode_imm12:$addr)>;
-def : T2Pat<(extloadi16 t2addrmode_imm8:$addr),
- (t2LDRHi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
(t2LDRHs t2addrmode_so_reg:$addr)>;
def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
@@ -1247,83 +1218,86 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
// Indexed loads
let mayLoad = 1, neverHasSideEffects = 1 in {
-def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
- "ldr", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
-def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
- "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn",
- []>;
+def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
-def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
-def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
- []>;
-
-def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
-def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn",
- []>;
-
-def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
-def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
- []>;
-
-def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
-def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn",
- []>;
+ "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
} // mayLoad = 1, neverHasSideEffects = 1
-// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
-// for disassembly only.
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
"\t$Rt, $addr", []> {
+ bits<4> Rt;
+ bits<13> addr;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
let Inst{23} = 0;
let Inst{22-21} = type;
let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt;
let Inst{11} = 1;
let Inst{10-8} = 0b110; // PUW.
-
- bits<4> Rt;
- bits<13> addr;
- let Inst{15-12} = Rt;
- let Inst{19-16} = addr{12-9};
- let Inst{7-0} = addr{7-0};
+ let Inst{7-0} = addr{7-0};
}
def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
@@ -1333,67 +1307,97 @@ def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
// Store
-defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
BinOpFrag<(store node:$LHS, node:$RHS)>>;
defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
- BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+ rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
- BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+ rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>;
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
// Indexed stores
-def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
- "str", "\t$Rt, [$Rn, $addr]!",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
-
-def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
- "str", "\t$Rt, [$Rn], $addr",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
-
-def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ "str", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
+def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
- "strh", "\t$Rt, [$Rn, $addr]!",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
-
-def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
- "strh", "\t$Rt, [$Rn], $addr",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+ "strh", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
-def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
- "strb", "\t$Rt, [$Rn, $addr]!",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+ "strb", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
-def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+ "str", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_store rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
- "strb", "\t$Rt, [$Rn], $addr",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+ "strh", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+}
+
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
// only.
@@ -1424,21 +1428,31 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
// For disassembly only.
-def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
- (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
+ let AsmMatchConverter = "cvtT2LdrdPre";
+ let DecoderMethod = "DecodeT2LDRDPreInstruction";
+}
-def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
- (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
+def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
+ IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
-def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs),
- (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
- IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $wb", []> {
+ let AsmMatchConverter = "cvtT2StrdPre";
+ let DecoderMethod = "DecodeT2STRDPreInstruction";
+}
-def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
- (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
- IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
+def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
+ t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access. These are for disassembly only.
@@ -1463,9 +1477,9 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let Inst{11-0} = addr{11-0}; // imm12
}
- def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
+ def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
"\t$addr",
- [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> {
+ [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{23} = 0; // U = 0
@@ -1496,6 +1510,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let Inst{19-16} = addr{9-6}; // Rn
let Inst{3-0} = addr{5-2}; // Rm
let Inst{5-4} = addr{1-0}; // imm2
+
+ let DecoderMethod = "DecodeT2LoadShift";
}
}
@@ -1507,11 +1523,11 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
// Load / store multiple Instructions.
//
-multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
+multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
InstrItinClass itin_upd, bit L_bit> {
def IA :
T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> {
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
bits<4> Rn;
bits<16> regs;
@@ -1522,11 +1538,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
let Inst{19-16} = Rn;
- let Inst{15-0} = regs;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
}
def IA_UPD :
T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
bits<4> Rn;
bits<16> regs;
@@ -1537,11 +1554,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
let Inst{19-16} = Rn;
- let Inst{15-0} = regs;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
}
def DB :
T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> {
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
bits<4> Rn;
bits<16> regs;
@@ -1552,11 +1570,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
let Inst{19-16} = Rn;
- let Inst{15-0} = regs;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
}
def DB_UPD :
T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> {
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
bits<4> Rn;
bits<16> regs;
@@ -1567,17 +1586,95 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
let Inst{19-16} = Rn;
- let Inst{15-0} = regs;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
}
}
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+multiclass thumb2_st_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+}
+
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
} // neverHasSideEffects
@@ -1587,7 +1684,7 @@ defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
//
let neverHasSideEffects = 1 in
-def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
"mov", ".w\t$Rd, $Rm", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -1596,6 +1693,10 @@ def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
+def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
@@ -1610,12 +1711,20 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
let Inst{15} = 0;
}
-def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
- pred:$p, cc_out:$s)>,
- Requires<[IsThumb2]>;
+// cc_out is handled as part of the explicit mnemonic in the parser for 'mov'.
+// Use aliases to get that to play nice here.
+def : t2InstAlias<"movs${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+
+def : t2InstAlias<"mov${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
+def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
-def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
"movw", "\t$Rd, $imm",
[(set rGPR:$Rd, imm0_65535:$imm)]> {
let Inst{31-27} = 0b11110;
@@ -1632,6 +1741,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
let Inst{26} = imm{11};
let Inst{14-12} = imm{10-8};
let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
}
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
@@ -1639,7 +1749,7 @@ def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
let Constraints = "$src = $Rd" in {
def t2MOVTi16 : T2I<(outs rGPR:$Rd),
- (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi,
+ (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi,
"movt", "\t$Rd, $imm",
[(set rGPR:$Rd,
(or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
@@ -1657,6 +1767,7 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
let Inst{26} = imm{11};
let Inst{14-12} = imm{10-8};
let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
}
def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
@@ -1671,28 +1782,26 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
// Sign extenders
-defm t2SXTB : T2I_ext_rrot<0b100, "sxtb",
+def t2SXTB : T2I_ext_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm t2SXTH : T2I_ext_rrot<0b000, "sxth",
+def t2SXTH : T2I_ext_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
-defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+def t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
-
-// TODO: SXT(A){B|H}16 - done for disassembly only
+def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
// Zero extenders
let AddedComplexity = 16 in {
-defm t2UXTB : T2I_ext_rrot<0b101, "uxtb",
+def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm t2UXTH : T2I_ext_rrot<0b001, "uxth",
+def t2UXTH : T2I_ext_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
@@ -1700,17 +1809,17 @@ defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
// instead so we can include a check for masking back in the upper
// eight bits of the source into the lower eight bits of the result.
//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
-// (t2UXTB16r_rot rGPR:$Src, 24)>,
+// (t2UXTB16 rGPR:$Src, 3)>,
// Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot rGPR:$Src, 8)>,
+ (t2UXTB16 rGPR:$Src, 1)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
-defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
-defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
+def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
}
//===----------------------------------------------------------------------===//
@@ -1723,27 +1832,37 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+//
+// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsi,
- BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
IIC_iALUi, IIC_iALUr, IIC_iALUsi,
- BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+let hasPostISelHook = 1 in {
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
- BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
- BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
-defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS,
- node:$RHS)>, 1>;
-defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS,
- node:$RHS)>>;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+}
// RSB
defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
- BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -1760,23 +1879,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
let AddedComplexity = 1 in
-def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm),
+def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
(t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
+def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
(t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
let AddedComplexity = 1 in
-def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm),
+def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
(t2SBCri rGPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm),
+def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
-let AddedComplexity = 1 in
-def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm),
- (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm),
- (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only
@@ -1893,8 +2007,7 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
let Inst{7-4} = op7_4;
}
-// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-
+// Unsigned Sum of Absolute Differences [and Accumulate].
def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
@@ -1906,8 +2019,7 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
"usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
-// Signed/Unsigned saturate -- for disassembly only
-
+// Signed/Unsigned saturate.
class T2SatI<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
@@ -1918,26 +2030,26 @@ class T2SatI<dag oops, dag iops, InstrItinClass itin,
let Inst{11-8} = Rd;
let Inst{19-16} = Rn;
- let Inst{4-0} = sat_imm{4-0};
- let Inst{21} = sh{6};
+ let Inst{4-0} = sat_imm;
+ let Inst{21} = sh{5};
let Inst{14-12} = sh{4-2};
let Inst{7-6} = sh{1-0};
}
def t2SSAT: T2SatI<
- (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
- NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh",
- [/* For disassembly only; pattern left blank */]> {
+ (outs rGPR:$Rd),
+ (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
let Inst{15} = 0;
+ let Inst{5} = 0;
}
def t2SSAT16: T2SatI<
- (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary,
- "ssat16", "\t$Rd, $sat_imm, $Rn",
- [/* For disassembly only; pattern left blank */]>,
+ (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
+ "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
@@ -1946,30 +2058,30 @@ def t2SSAT16: T2SatI<
let Inst{21} = 1; // sh = '1'
let Inst{14-12} = 0b000; // imm3 = '000'
let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
}
def t2USAT: T2SatI<
- (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
- NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh",
- [/* For disassembly only; pattern left blank */]> {
+ (outs rGPR:$Rd),
+ (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
let Inst{15} = 0;
}
-def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn),
+def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
NoItinerary,
- "usat16", "\t$dst, $sat_imm, $Rn",
- [/* For disassembly only; pattern left blank */]>,
+ "usat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsThumb2, HasThumb2DSP]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
+ let Inst{31-22} = 0b1111001110;
let Inst{20} = 0;
let Inst{15} = 0;
let Inst{21} = 1; // sh = '1'
let Inst{14-12} = 0b000; // imm3 = '000'
let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
}
def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
@@ -1979,10 +2091,14 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
// Shift and rotate Instructions.
//
-defm t2LSL : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl node:$LHS, node:$RHS)>>;
-defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>;
-defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
-defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
+ BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">;
+defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
+ BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">;
+defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
+ BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">;
+defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
@@ -2090,7 +2206,7 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
}
def t2SBFX: T2TwoRegBitFI<
- (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -2099,7 +2215,7 @@ def t2SBFX: T2TwoRegBitFI<
}
def t2UBFX: T2TwoRegBitFI<
- (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -2125,26 +2241,6 @@ let Constraints = "$src = $Rd" in {
let msb{4-0} = imm{9-5};
let lsb{4-0} = imm{4-0};
}
-
- // GNU as only supports this form of bfi (w/ 4 arguments)
- let isAsmParserOnly = 1 in
- def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd),
- (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit,
- width_imm:$width),
- IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
- []> {
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0; // should be 0.
- let Inst{25} = 1;
- let Inst{24-20} = 0b10110;
- let Inst{15} = 0;
- let Inst{5} = 0; // should be 0.
-
- bits<5> lsbit;
- bits<5> width;
- let msb{4-0} = width; // Custom encoder => lsb+width-1
- let lsb{4-0} = lsbit;
- }
}
defm t2ORN : T2I_bin_irs<0b0011, "orn",
@@ -2152,13 +2248,53 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn",
BinOpFrag<(or node:$LHS, (not node:$RHS))>,
"t2ORN", 0, "">;
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+ // shifted imm
+ def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+ let isAsCheapAsAMove = Cheap;
+ let isReMaterializable = ReMat;
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ }
+ // register
+ def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ }
+}
+
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn",
IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
UnOpFrag<(not node:$Src)>, 1, 1>;
-
let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
(t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
@@ -2209,9 +2345,9 @@ def t2MLS: T2FourReg<
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
def t2SMULL : T2MulLong<0b000, 0b0000,
- (outs rGPR:$Rd, rGPR:$Ra),
+ (outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
- "smull", "\t$Rd, $Ra, $Rn, $Rm", []>;
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
def t2UMULL : T2MulLong<0b010, 0b0000,
(outs rGPR:$RdLo, rGPR:$RdHi),
@@ -2468,7 +2604,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+// Halfword multiple accumulate long: SMLAL<x><y>
def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
@@ -2487,8 +2623,6 @@ def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
Requires<[IsThumb2, HasThumb2DSP]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-// These are for disassembly only.
-
def t2SMUAD: T2ThreeReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
@@ -2513,7 +2647,7 @@ def t2SMUSDX:T2ThreeReg_mac<
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{15-12} = 0b1111;
}
-def t2SMLAD : T2ThreeReg_mac<
+def t2SMLAD : T2FourReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
"\t$Rd, $Rn, $Rm, $Ra", []>,
@@ -2532,20 +2666,20 @@ def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
"\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
- "\t$Ra, $Rd, $Rm, $Rn", []>,
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
- "\t$Ra, $Rd, $Rm, $Rn", []>,
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
- "\t$Ra, $Rd, $Rm, $Rn", []>,
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
- "\t$Ra, $Rd, $Rm, $Rn", []>,
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
//===----------------------------------------------------------------------===//
@@ -2613,10 +2747,10 @@ def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
(t2REVSH rGPR:$Rm)>;
def t2PKHBT : T2ThreeReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_lsl_amt:$sh),
IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
- (and (shl rGPR:$Rm, lsl_amt:$sh),
+ (and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11101;
@@ -2625,9 +2759,9 @@ def t2PKHBT : T2ThreeReg<
let Inst{5} = 0; // BT form
let Inst{4} = 0;
- bits<8> sh;
- let Inst{14-12} = sh{7-5};
- let Inst{7-6} = sh{4-3};
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
}
// Alternate cases for PKHBT where identities eliminate some nodes.
@@ -2635,16 +2769,16 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
(t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
- (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
+ (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
def t2PKHTB : T2ThreeReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_asr_amt:$sh),
IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
- (and (sra rGPR:$Rm, asr_amt:$sh),
+ (and (sra rGPR:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11101;
@@ -2653,19 +2787,19 @@ def t2PKHTB : T2ThreeReg<
let Inst{5} = 1; // TB form
let Inst{4} = 0;
- bits<8> sh;
- let Inst{14-12} = sh{7-5};
- let Inst{7-6} = sh{4-3};
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
}
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
- (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
(and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
- (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
//===----------------------------------------------------------------------===//
@@ -2673,14 +2807,14 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">;
-def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_imm:$imm),
- (t2CMPri GPR:$lhs, t2_so_imm:$imm)>;
-def : T2Pat<(ARMcmpZ GPR:$lhs, rGPR:$rhs),
- (t2CMPrr GPR:$lhs, rGPR:$rhs)>;
-def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs),
- (t2CMPrs GPR:$lhs, t2_so_reg:$rhs)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
+ (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs),
+ (t2CMPrr GPRnopc:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs),
+ (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>;
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
// Compare-to-zero still works out, just not the relationals
@@ -2688,20 +2822,23 @@ def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs),
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+ BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>,
+ "t2CMNz">;
//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
// (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
-def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm),
- (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
+ (t2CMNzri GPRnopc:$src, t2_so_imm_neg:$imm)>;
defm t2TST : T2I_cmp_irs<0b0000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>,
+ "t2TST">;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>,
+ "t2TEQ">;
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
@@ -2723,7 +2860,7 @@ def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
// FIXME: Pseudo-ize these. For now, just mark codegen only.
let isCodeGenOnly = 1 in {
let isMoveImm = 1 in
-def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm),
IIC_iCMOVi,
"movw", "\t$Rd, $imm", []>,
RegConstraint<"$false = $Rd"> {
@@ -2807,20 +2944,19 @@ def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
}
def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
- "dsb", "\t$opt",
- [/* For disassembly only; pattern left blank */]>,
+ "dsb", "\t$opt", []>,
Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f4;
let Inst{3-0} = opt;
}
-// ISB has only full system option -- for disassembly only
-def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasV7]> {
+def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "isb", "\t$opt",
+ []>, Requires<[IsThumb2, HasDB]> {
+ bits<4> opt;
let Inst{31-4} = 0xf3bf8f6;
- let Inst{3-0} = 0b1111;
+ let Inst{3-0} = opt;
}
class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
@@ -2858,28 +2994,27 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexb", "\t$Rt, $addr", "", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexh", "\t$Rt, $addr", "", []>;
-def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
AddrModeNone, 4, NoItinerary,
"ldrex", "\t$Rt, $addr", "", []> {
+ bits<4> Rt;
+ bits<12> addr;
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000101;
- let Inst{11-8} = 0b1111;
- let Inst{7-0} = 0b00000000; // imm8 = 0
-
- bits<4> Rt;
- bits<4> addr;
- let Inst{19-16} = addr;
+ let Inst{19-16} = addr{11-8};
let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = addr{7-0};
}
let hasExtraDefRegAllocReq = 1 in
def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
- (ins t2addrmode_reg:$addr),
+ (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexd", "\t$Rt, $Rt2, $addr", "",
[], {?, ?, ?, ?}> {
@@ -2890,33 +3025,33 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
- (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexb", "\t$Rd, $Rt, $addr", "", []>;
def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
- (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexh", "\t$Rd, $Rt, $addr", "", []>;
-def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ t2addrmode_imm0_1020s4:$addr),
AddrModeNone, 4, NoItinerary,
"strex", "\t$Rd, $Rt, $addr", "",
[]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0000100;
- let Inst{7-0} = 0b00000000; // imm8 = 0
-
bits<4> Rd;
- bits<4> addr;
bits<4> Rt;
- let Inst{11-8} = Rd;
- let Inst{19-16} = addr;
+ bits<12> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000100;
+ let Inst{19-16} = addr{11-8};
let Inst{15-12} = Rt;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = addr{7-0};
}
}
let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
- (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
{?, ?, ?, ?}> {
@@ -2924,9 +3059,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
let Inst{11-8} = Rt2;
}
-// Clear-Exclusive is for disassembly only.
-def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
- [/* For disassembly only; pattern left blank */]>,
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
Requires<[IsThumb2, HasV7]> {
let Inst{31-16} = 0xf3bf;
let Inst{15-14} = 0b10;
@@ -2986,8 +3119,8 @@ def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
-def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
- "b.w\t$target",
+def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
+ "b", ".w\t$target",
[(br bb:$target)]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
@@ -3009,15 +3142,13 @@ def t2BR_JT : t2PseudoInst<(outs),
// FIXME: Add a non-pc based case that can be predicated.
def t2TBB_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt, i32imm:$id),
- 0, IIC_Br, []>;
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
def t2TBH_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt, i32imm:$id),
- 0, IIC_Br, []>;
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
-def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
- "tbb", "\t[$Rn, $Rm]", []> {
+def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
+ "tbb", "\t$addr", []> {
bits<4> Rn;
bits<4> Rm;
let Inst{31-20} = 0b111010001101;
@@ -3025,10 +3156,12 @@ def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
let Inst{15-5} = 0b11110000000;
let Inst{4} = 0; // B form
let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
}
-def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
- "tbh", "\t[$Rn, $Rm, lsl #1]", []> {
+def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
+ "tbh", "\t$addr", []> {
bits<4> Rn;
bits<4> Rm;
let Inst{31-20} = 0b111010001101;
@@ -3036,13 +3169,15 @@ def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
let Inst{15-5} = 0b11110000000;
let Inst{4} = 1; // H form
let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
}
} // isNotDuplicable, isIndirectBranch
} // isBranch, isTerminator, isBarrier
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
-// a two-value operand where a dag node expects two operands. :(
+// a two-value operand where a dag node expects ", "two operands. :(
let isBranch = 1, isTerminator = 1 in
def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
"b", ".w\t$target",
@@ -3060,6 +3195,8 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
let Inst{13} = target{18};
let Inst{21-16} = target{17-12};
let Inst{10-0} = target{11-1};
+
+ let DecoderMethod = "DecodeThumb2BCCInstruction";
}
// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
@@ -3068,9 +3205,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Darwin version.
let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
Uses = [SP] in
- def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops),
+ def tTAILJMPd: tPseudoExpand<(outs),
+ (ins uncondbrtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
- (t2B uncondbrtarget:$dst)>,
+ (t2B uncondbrtarget:$dst, pred:$p)>,
Requires<[IsThumb2, IsDarwin]>;
}
@@ -3087,30 +3225,55 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
bits<4> mask;
let Inst{7-4} = cc;
let Inst{3-0} = mask;
+
+ let DecoderMethod = "DecodeIT";
}
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
- [/* For disassembly only; pattern left blank */]> {
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> {
+ bits<4> func;
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
let Inst{25-20} = 0b111100;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
-
- bits<4> func;
let Inst{19-16} = func;
+ let Inst{15-0} = 0b1000111100000000;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbz\t$Rn, $target", []>,
+ T1Misc<{0,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+
+ def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbnz\t$Rn, $target", []>,
+ T1Misc<{1,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
}
-// Change Processor State is a system instruction -- for disassembly and
-// parsing only.
+
+// Change Processor State is a system instruction.
// FIXME: Since the asm parser has currently no clean way to handle optional
// operands, create 3 versions of the same instruction. Once there's a clean
// framework to represent optional operands, change this behavior.
class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
- !strconcat("cps", asm_op),
- [/* For disassembly only; pattern left blank */]> {
+ !strconcat("cps", asm_op), []> {
bits<2> imod;
bits<3> iflags;
bits<5> mode;
@@ -3126,6 +3289,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
let Inst{8} = M;
let Inst{7-5} = iflags;
let Inst{4-0} = mode;
+ let DecoderMethod = "DecodeT2CPSInstruction";
}
let M = 1 in
@@ -3135,14 +3299,12 @@ let mode = 0, M = 0 in
def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
"$imod.w\t$iflags">;
let imod = 0, iflags = 0, M = 1 in
- def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">;
+ def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">;
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-// Helper class for disassembly only.
class T2I_hint<bits<8> op7_0, string opc, string asm>
- : T2I<(outs), (ins), NoItinerary, opc, asm,
- [/* For disassembly only; pattern left blank */]> {
+ : T2I<(outs), (ins), NoItinerary, opc, asm, []> {
let Inst{31-20} = 0xf3a;
let Inst{19-16} = 0b1111;
let Inst{15-14} = 0b10;
@@ -3158,20 +3320,17 @@ def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
- let Inst{31-20} = 0xf3a;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
- let Inst{10-8} = 0b000;
- let Inst{7-4} = 0b1111;
-
bits<4> opt;
+ let Inst{31-20} = 0b111100111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-8} = 0b10000000;
+ let Inst{7-4} = 0b1111;
let Inst{3-0} = opt;
}
-// Secure Monitor Call is a system instruction -- for disassembly only
+// Secure Monitor Call is a system instruction.
// Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
- [/* For disassembly only; pattern left blank */]> {
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
let Inst{31-27} = 0b11110;
let Inst{26-20} = 0b1111111;
let Inst{15-12} = 0b1000;
@@ -3180,32 +3339,30 @@ def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
let Inst{19-16} = opt;
}
-class T2SRS<bits<12> op31_20,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
- let Inst{31-20} = op31_20{11-0};
-
bits<5> mode;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24-23} = Op;
+ let Inst{22} = 0;
+ let Inst{21} = W;
+ let Inst{20-16} = 0b01101;
+ let Inst{15-5} = 0b11000000000;
let Inst{4-0} = mode{4-0};
}
-// Store Return State is a system instruction -- for disassembly only
-def t2SRSDBW : T2SRS<0b111010000010,
- (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
- [/* For disassembly only; pattern left blank */]>;
-def t2SRSDB : T2SRS<0b111010000000,
- (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
- [/* For disassembly only; pattern left blank */]>;
-def t2SRSIAW : T2SRS<0b111010011010,
- (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
- [/* For disassembly only; pattern left blank */]>;
-def t2SRSIA : T2SRS<0b111010011000,
- (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
- [/* For disassembly only; pattern left blank */]>;
-
-// Return From Exception is a system instruction -- for disassembly only
+// Store Return State is a system instruction.
+def t2SRSDB_UPD : T2SRS<0b00, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb", "\tsp!, $mode", []>;
+def t2SRSDB : T2SRS<0b00, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb","\tsp, $mode", []>;
+def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp!, $mode", []>;
+def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp, $mode", []>;
+// Return From Exception is a system instruction.
class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
@@ -3277,53 +3434,186 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
imm:$cp))]>,
Requires<[IsThumb2]>;
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
+ NoItinerary, []>, Requires<[IsThumb2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor load/store -- for disassembly only
+//
+class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm>
+ : T2I<oops, iops, NoItinerary, opc, asm, []> {
+ let Inst{31-28} = op31_28;
+ let Inst{27-25} = 0b110;
+}
+
+multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
+ def _OFFSET : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset"> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : T2CI<op31_28, (outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+
+defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
+defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
+defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
+defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
+
+
//===----------------------------------------------------------------------===//
// Move between special register and ARM core register -- for disassembly only
//
+// Move to ARM core register from Special Register
-class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
- let Inst{31-20} = op31_20{11-0};
- let Inst{15-14} = op15_14{1-0};
- let Inst{12} = op12{0};
+// A/R class MRS.
+//
+// A/R class can only move from CPSR or SPSR.
+def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>,
+ Requires<[IsThumb2,IsARClass]> {
+ bits<4> Rd;
+ let Inst{31-12} = 0b11110011111011111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b0000;
}
-class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> {
+def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
+
+def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []>,
+ Requires<[IsThumb2,IsARClass]> {
bits<4> Rd;
+ let Inst{31-12} = 0b11110011111111111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b0000;
+}
+
+// M class MRS.
+//
+// This MRS has a mask field in bits 7-0 and can take more values than
+// the A/R class (a full msr_mask).
+def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
+ "mrs", "\t$Rd, $mask", []>,
+ Requires<[IsThumb2,IsMClass]> {
+ bits<4> Rd;
+ bits<8> mask;
+ let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
let Inst{19-16} = 0b1111;
+ let Inst{7-0} = mask;
}
-def t2MRS : T2MRS<0b111100111110, 0b10, 0,
- (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
- [/* For disassembly only; pattern left blank */]>;
-def t2MRSsys : T2MRS<0b111100111111, 0b10, 0,
- (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
- [/* For disassembly only; pattern left blank */]>;
// Move from ARM core register to Special Register
//
+// A/R class MSR.
+//
// No need to have both system and application versions, the encodings are the
// same and the assembly parser has no way to distinguish between them. The mask
// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
// the mask with the fields to be accessed in the special register.
-def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
- 0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn),
- NoItinerary, "msr", "\t$mask, $Rn",
- [/* For disassembly only; pattern left blank */]> {
+def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
+ NoItinerary, "msr", "\t$mask, $Rn", []>,
+ Requires<[IsThumb2,IsARClass]> {
bits<5> mask;
bits<4> Rn;
- let Inst{19-16} = Rn;
+ let Inst{31-21} = 0b11110011100;
let Inst{20} = mask{4}; // R Bit
- let Inst{13} = 0b0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
let Inst{11-8} = mask{3-0};
+ let Inst{7-0} = 0;
}
+// M class MSR.
+//
+// Move from ARM core register to Special Register
+def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
+ NoItinerary, "msr", "\t$SYSm, $Rn", []>,
+ Requires<[IsThumb2,IsMClass]> {
+ bits<8> SYSm;
+ bits<4> Rn;
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = 0b0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{7-0} = SYSm;
+}
+
+
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register
//
@@ -3389,13 +3679,12 @@ def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
/* from coprocessor to ARM core register */
def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
- (outs GPR:$Rt),
- (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
- []>;
+ (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
- (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
- c_imm:$CRm, i32imm:$opc2), []>;
+ (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -3465,3 +3754,269 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
}
+
+
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+}
+
+def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Atomic load/store patterns
+def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_imm12:$addr),
+ (t2LDRi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_negimm8:$addr),
+ (t2LDRi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_so_reg:$addr),
+ (t2LDRs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRBi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRBi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRBs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRHi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRHi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRHs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Aliases for ADC without the ".w" optional width specifier.
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $Rm",
+ (t2ADCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for SBC without the ".w" optional width specifier.
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $Rm",
+ (t2SBCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SBCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for ADD without the ".w" optional width specifier.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
+ (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for SUB without the ".w" optional width specifier.
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
+ (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Alias for compares without the ".w" optional width specifier.
+def : t2InstAlias<"cmn${p} $Rn, $Rm",
+ (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"teq${p} $Rn, $Rm",
+ (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"tst${p} $Rn, $Rm",
+ (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+
+// Memory barriers
+def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb2, HasDB]>;
+
+// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+// Alias for MVN without the ".w" optional width specifier.
+def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
+ (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
+ (t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>;
+
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// PUSH/POP aliases for STM/LDM
+def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// Alias for REV/REV16/REVSH without the ".w" optional width specifier.
+def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"revsh${p} $Rd, $Rm", (t2REVSH rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+
+
+// Alias for RSB without the ".w" optional width specifier, and with optional
+// implied destination register.
+def : t2InstAlias<"rsb${s}${p} $Rd, $Rn, $imm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $imm",
+ (t2RSBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $Rm",
+ (t2RSBrr rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $ShiftedRm",
+ (t2RSBrs rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p,
+ cc_out:$s)>;
+
+// SSAT/USAT optional shift operand.
+def : t2InstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (t2SSAT rGPR:$Rd, imm1_32:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+def : t2InstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (t2USAT rGPR:$Rd, imm0_31:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+
+// STM w/o the .w suffix.
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// Alias for STR, STRB, and STRH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+// Extend instruction optional rotate operand.
+def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb16${p} $Rd, $Rm",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb${p} $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb16${p} $Rd, $Rm",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"uxtb${p}.w $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+// Extend instruction w/o the ".w" optional width specifier.
+def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index f1f3cb9..e746cf2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -31,18 +31,34 @@ def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
// Operand Definitions.
//
+// 8-bit floating-point immediate encodings.
+def FPImmOperand : AsmOperandClass {
+ let Name = "FPImm";
+ let ParserMethod = "parseFPImm";
+}
+
def vfp_f32imm : Operand<f32>,
PatLeaf<(f32 fpimm), [{
- return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
- }]> {
- let PrintMethod = "printVFPf32ImmOperand";
+ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP32Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
}
def vfp_f64imm : Operand<f64>,
PatLeaf<(f64 fpimm), [{
- return ARM::getVFPf64Imm(N->getValueAPF()) != -1;
- }]> {
- let PrintMethod = "printVFPf64ImmOperand";
+ return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP64Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
}
@@ -385,26 +401,26 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half-precision and single-precision. For disassembly only.
// FIXME: Verify encoding after integrated assembler is working.
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
- /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f32_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
- /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f16_to_f32 GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
- /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
- /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
@@ -511,14 +527,25 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
}
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
- (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
- IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+ (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
[/* For disassembly only; pattern left blank */]> {
+ bits<5> src1;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = src1{3-0};
+ let Inst{5} = src1{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
let Inst{7-6} = 0b00;
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
+ let DecoderMethod = "DecodeVMOVRRS";
}
} // neverHasSideEffects
@@ -552,11 +579,24 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
[/* For disassembly only; pattern left blank */]> {
+ // Instruction operands.
+ bits<5> dst1;
+ bits<4> src1;
+ bits<4> src2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = dst1{3-0};
+ let Inst{5} = dst1{4};
+ let Inst{15-12} = src1;
+ let Inst{19-16} = src2;
+
let Inst{7-6} = 0b00;
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
+
+ let DecoderMethod = "DecodeVMOVSRR";
}
// FMRDH: SPR -> GPR
@@ -1084,45 +1124,42 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
"vmov", ".f64\t$Dd, $imm",
[(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
- // Instruction operands.
- bits<5> Dd;
- bits<32> imm;
-
- // Encode instruction operands.
- let Inst{15-12} = Dd{3-0};
- let Inst{22} = Dd{4};
- let Inst{19} = imm{31};
- let Inst{18-16} = imm{22-20};
- let Inst{3-0} = imm{19-16};
+ bits<5> Dd;
+ bits<8> imm;
- // Encode remaining instruction bits.
let Inst{27-23} = 0b11101;
+ let Inst{22} = Dd{4};
let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Dd{3-0};
let Inst{11-9} = 0b101;
let Inst{8} = 1; // Double precision.
let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
}
def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
VFPMiscFrm, IIC_fpUNA32,
"vmov", ".f32\t$Sd, $imm",
[(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
- // Instruction operands.
- bits<5> Sd;
- bits<32> imm;
-
- // Encode instruction operands.
- let Inst{15-12} = Sd{4-1};
- let Inst{22} = Sd{0};
- let Inst{19} = imm{31}; // The immediate is handled as a double.
- let Inst{18-16} = imm{22-20};
- let Inst{3-0} = imm{19-16};
+ bits<5> Sd;
+ bits<8> imm;
- // Encode remaining instruction bits.
let Inst{27-23} = 0b11101;
+ let Inst{22} = Sd{0};
let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Sd{4-1};
let Inst{11-9} = 0b101;
let Inst{8} = 0; // Single precision.
let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
}
}
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases.
+//
+
+def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c6efea1..faa8ba7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -14,10 +14,10 @@
#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -763,9 +764,9 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
ARM_AM::AddrOpc Mode) {
switch (Opc) {
case ARM::LDRi12:
- return ARM::LDR_PRE;
+ return ARM::LDR_PRE_IMM;
case ARM::STRi12:
- return ARM::STR_PRE;
+ return ARM::STR_PRE_IMM;
case ARM::VLDRS:
return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
case ARM::VLDRD:
@@ -789,9 +790,9 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
ARM_AM::AddrOpc Mode) {
switch (Opc) {
case ARM::LDRi12:
- return ARM::LDR_POST;
+ return ARM::LDR_POST_IMM;
case ARM::STRi12:
- return ARM::STR_POST;
+ return ARM::STR_POST_IMM;
case ARM::VLDRS:
return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
case ARM::VLDRD:
@@ -892,12 +893,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- unsigned Offset = 0;
- if (isAM2)
- Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
- else if (!isAM5)
- Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
-
if (isAM5) {
// VLDM[SD}_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
@@ -911,28 +906,44 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
.addReg(MO.getReg(), (isLd ? getDefRegState(true) :
getKillRegState(MO.isKill())));
} else if (isLd) {
- if (isAM2)
- // LDR_PRE, LDR_POST,
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
- else
+ if (isAM2) {
+ // LDR_PRE, LDR_POST
+ if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ } else {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
} else {
MachineOperand &MO = MI->getOperand(0);
- if (isAM2)
+ // FIXME: post-indexed stores use am2offset_imm, which still encodes
+ // the vestigal zero-reg offset register. When that's fixed, this clause
+ // can be removed entirely.
+ if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
+ int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
- else
+ } else {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
}
MBB.erase(MBBI);
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index 7411b59..daa126d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -14,7 +14,7 @@
#include "ARM.h"
#include "ARMAsmPrinter.h"
-#include "ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/MC/MCExpr.h"
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 76eb496..036822d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -182,8 +182,10 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
// Current Program Status Register.
def CPSR : ARMReg<0, "cpsr">;
-def FPSCR : ARMReg<1, "fpscr">;
-def ITSTATE : ARMReg<2, "itstate">;
+def APSR : ARMReg<1, "apsr">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
def FPSID : ARMReg<0, "fpsid">;
@@ -213,6 +215,23 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
}];
}
+// GPRs without the PC. Some ARM instructions do not allow the PC in
+// certain operand slots, particularly as the destination. Primarily
+// useful for disassembly.
+def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
+// implied SP argument list.
+// FIXME: It would be better to not use this at all and refactor the
+// instructions to not have SP an an explicit argument. That makes
+// frame index resolution a bit trickier, though.
+def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>;
+
// restricted GPR register class. Many Thumb2 instructions allow the full
// register range for operands, but have undefined behaviours when PC
// or SP (R13 or R15) are used. The ARM ISA refers to these operands
@@ -328,5 +347,6 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index ef0aaf2..a3a3d58 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -138,13 +138,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
// Adjust parameters for memset, EABI uses format (ptr, size, value),
// GNU library uses (ptr, value, size)
// See RTABI section 4.3.4
-SDValue
-ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, SDValue Size,
- unsigned Align, bool isVolatile,
- MachinePointerInfo DstPtrInfo) const
-{
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
// Use default for non AAPCS subtargets
if (!Subtarget->isAAPCS_ABI())
return SDValue();
@@ -155,7 +154,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
TargetLowering::ArgListEntry Entry;
// First argument: data pointer
- const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
Entry.Node = Dst;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index ec1bf5c..6419a73 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -14,10 +14,27 @@
#ifndef ARMSELECTIONDAGINFO_H
#define ARMSELECTIONDAGINFO_H
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
+namespace ARM_AM {
+ static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) {
+ switch (Opcode) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+} // end namespace ARM_AM
+
class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 1cab9e4..247d6be 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -53,11 +53,14 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasVMLxForwarding(false)
, SlowFPBrcc(false)
, InThumbMode(false)
+ , InNaClMode(false)
, HasThumb2(false)
+ , IsMClass(false)
, NoARM(false)
, PostRAScheduler(false)
, IsR9Reserved(ReserveR9)
, UseMovt(false)
+ , SupportsTailCall(false)
, HasFP16(false)
, HasD16(false)
, HasHardwareDivide(false)
@@ -111,6 +114,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
else {
IsR9Reserved = ReserveR9 | !HasV6Ops;
UseMovt = DarwinUseMOVT && hasV6T2Ops();
+ const Triple &T = getTargetTriple();
+ SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0);
}
if (!isThumb() || hasThumb2())
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index c650872..b63e108 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -70,9 +70,16 @@ protected:
/// InThumbMode - True if compiling for Thumb, false for ARM.
bool InThumbMode;
+ /// InNaClMode - True if targeting Native Client
+ bool InNaClMode;
+
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
+ /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
+ /// v6m, v7m for example.
+ bool IsMClass;
+
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
@@ -86,6 +93,11 @@ protected:
/// imms (including global addresses).
bool UseMovt;
+ /// SupportsTailCall - True if the OS supports tail call. The dynamic linker
+ /// must be able to synthesize call stubs for interworking between ARM and
+ /// Thumb.
+ bool SupportsTailCall;
+
/// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
/// only so far)
bool HasFP16;
@@ -209,6 +221,9 @@ protected:
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetNaCl() const {
+ return TargetTriple.getOS() == Triple::NativeClient;
+ }
bool isTargetELF() const { return !isTargetDarwin(); }
bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
@@ -218,10 +233,13 @@ protected:
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
bool isThumb2() const { return InThumbMode && HasThumb2; }
bool hasThumb2() const { return HasThumb2; }
+ bool isMClass() const { return IsMClass; }
+ bool isARClass() const { return !IsMClass; }
bool isR9Reserved() const { return IsR9Reserved; }
bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+ bool supportsTailCall() const { return SupportsTailCall; }
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index f0b176a..96b1e89 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -15,77 +15,50 @@
#include "ARM.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
- MCContext &Ctx, TargetAsmBackend &TAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
-
- if (TheTriple.isOSWindows()) {
- llvm_unreachable("ARM does not support Windows COFF format");
- return NULL;
- }
-
- return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
-}
+static cl::opt<bool>
+EnableGlobalMerge("global-merge", cl::Hidden,
+ cl::desc("Enable global merge pass"),
+ cl::init(true));
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
- TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
-
- // Register the asm backend.
- TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
- TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
- TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
-
}
/// TargetMachine ctor - Create an ARM architecture model.
///
-ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
- DefRelocModel = getRelocationModel();
-
// Default to soft float ABI
if (FloatABIType == FloatABI::Default)
FloatABIType = FloatABI::Soft;
}
-ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget),
+ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
- "v128:32:128-v64:32:64-n32") :
+ "v128:32:128-v64:32:64-n32-S32") :
+ Subtarget.isAAPCS_ABI() ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32-S64") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
- "v128:64:128-v64:64:64-n32")),
+ "v128:64:128-v64:64:64-n32-S32")),
ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
@@ -95,20 +68,24 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
"support ARM mode execution!");
}
-ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : ARMBaseTargetMachine(T, TT, CPU, FS),
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"i16:16:32-i8:8:32-i1:8:32-"
- "v128:32:128-v64:32:64-a:0:32-n32") :
+ "v128:32:128-v64:32:64-a:0:32-n32-S32") :
+ Subtarget.isAAPCS_ABI() ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32-S64") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-"
- "v128:64:128-v64:64:64-a:0:32-n32")),
+ "v128:64:128-v64:64:64-a:0:32-n32-S32")),
ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
@@ -117,10 +94,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
-// Pass Pipeline Configuration
bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOpt::None && EnableGlobalMerge)
PM.add(createARMGlobalMergePass(getTargetLowering()));
return false;
@@ -139,7 +115,6 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
PM.add(createARMLoadStoreOptimizationPass(true));
if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
PM.add(createMLxExpansionPass());
-
return true;
}
@@ -150,7 +125,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
if (!Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass());
if (Subtarget.hasNEON())
- PM.add(createNEONMoveFixPass());
+ PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
// Expand some pseudo instructions into multiple instructions to allow
@@ -179,10 +154,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
- // FIXME: Move this to TargetJITInfo!
- if (DefRelocModel == Reloc::Default)
- setRelocationModel(Reloc::Static);
-
// Machine code emitter pass for ARM.
PM.add(createARMJITCodeEmitterPass(*this, JCE));
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
index bc3d46a..c8c601c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -37,11 +37,11 @@ protected:
private:
ARMJITInfo JITInfo;
InstrItineraryData InstrItins;
- Reloc::Model DefRelocModel; // Reloc model before it's overridden.
public:
- ARMBaseTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ ARMBaseTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
@@ -69,8 +69,9 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
ARMSelectionDAGInfo TSInfo;
ARMFrameLowering FrameLowering;
public:
- ARMTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ ARMTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const ARMRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
@@ -108,8 +109,9 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
// Either Thumb1FrameLowering or ARMFrameLowering.
OwningPtr<ARMFrameLowering> FrameLowering;
public:
- ThumbTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ ThumbTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
/// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index d9a5fa2..14d35ba 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -7,16 +7,15 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Target/TargetAsmLexer.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,7 +29,7 @@ using namespace llvm;
namespace {
-class ARMBaseAsmLexer : public TargetAsmLexer {
+class ARMBaseAsmLexer : public MCTargetAsmLexer {
const MCAsmInfo &AsmInfo;
const AsmToken &lexDefinite() {
@@ -43,7 +42,7 @@ protected:
rmap_ty RegisterMap;
- void InitRegisterMap(const TargetRegisterInfo *info) {
+ void InitRegisterMap(const MCRegisterInfo *info) {
unsigned numRegs = info->getNumRegs();
for (unsigned i = 0; i < numRegs; ++i) {
@@ -77,33 +76,23 @@ protected:
}
public:
ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : TargetAsmLexer(T), AsmInfo(MAI) {
+ : MCTargetAsmLexer(T), AsmInfo(MAI) {
}
};
class ARMAsmLexer : public ARMBaseAsmLexer {
public:
- ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
: ARMBaseAsmLexer(T, MAI) {
- std::string tripleString("arm-unknown-unknown");
- std::string featureString;
- std::string CPU;
- OwningPtr<const TargetMachine>
- targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
- InitRegisterMap(targetMachine->getRegisterInfo());
+ InitRegisterMap(&MRI);
}
};
class ThumbAsmLexer : public ARMBaseAsmLexer {
public:
- ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI)
: ARMBaseAsmLexer(T, MAI) {
- std::string tripleString("thumb-unknown-unknown");
- std::string featureString;
- std::string CPU;
- OwningPtr<const TargetMachine>
- targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
- InitRegisterMap(targetMachine->getRegisterInfo());
+ InitRegisterMap(&MRI);
}
};
@@ -149,6 +138,6 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() {
}
extern "C" void LLVMInitializeARMAsmLexer() {
- RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
- RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+ RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget);
+ RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
}
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index a474127..24f15b4 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,11 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMAddressingModes.h"
-#include "ARMMCExpr.h"
-#include "ARMBaseRegisterInfo.h"
-#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -20,12 +18,17 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
@@ -37,49 +40,65 @@ namespace {
class ARMOperand;
-class ARMAsmParser : public TargetAsmParser {
+class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ struct {
+ ARMCC::CondCodes Cond; // Condition for IT block.
+ unsigned Mask:4; // Condition mask for instructions.
+ // Starting at first 1 (from lsb).
+ // '1' condition as indicated in IT.
+ // '0' inverse of condition (else).
+ // Count of instructions in IT block is
+ // 4 - trailingzeroes(mask)
+
+ bool FirstCond; // Explicit flag for when we're parsing the
+ // First instruction in the IT block. It's
+ // implied in the mask, so needs special
+ // handling.
+
+ unsigned CurPosition; // Current position in parsing of IT
+ // block. In range [0,3]. Initialized
+ // according to count of instructions in block.
+ // ~0U if no active IT block.
+ } ITState;
+ bool inITBlock() { return ITState.CurPosition != ~0U;}
+ void forwardITPosition() {
+ if (!inITBlock()) return;
+ // Move to the next instruction in the IT block, if there is one. If not,
+ // mark the block as done.
+ unsigned TZ = CountTrailingZeros_32(ITState.Mask);
+ if (++ITState.CurPosition == 5 - TZ)
+ ITState.CurPosition = ~0U; // Done with the IT block after this.
+ }
+
+
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
- int TryParseRegister();
- virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
- bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
- int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &,
- ARMII::AddrMode AddrMode);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
- bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
- const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
- MCSymbolRefExpr::VariantKind Variant);
-
-
- bool ParseMemoryOffsetReg(bool &Negative,
- bool &OffsetRegShifted,
- enum ARM_AM::ShiftOpc &ShiftType,
- const MCExpr *&ShiftAmount,
- const MCExpr *&Offset,
- bool &OffsetIsReg,
- int &OffsetRegNum,
- SMLoc &E);
- bool ParseShift(enum ARM_AM::ShiftOpc &St,
- const MCExpr *&ShiftAmount, SMLoc &E);
- bool ParseDirectiveWord(unsigned Size, SMLoc L);
- bool ParseDirectiveThumb(SMLoc L);
- bool ParseDirectiveThumbFunc(SMLoc L);
- bool ParseDirectiveCode(SMLoc L);
- bool ParseDirectiveSyntax(SMLoc L);
-
- bool MatchAndEmitInstruction(SMLoc IDLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
- void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ int tryParseRegister();
+ bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+ int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
+ bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
+ unsigned &ShiftAmount);
+ bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveThumbFunc(SMLoc L);
+ bool parseDirectiveCode(SMLoc L);
+ bool parseDirectiveSyntax(SMLoc L);
+
+ StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
+ bool &CarrySetting, unsigned &ProcessorIMod,
+ StringRef &ITMask);
+ void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode);
bool isThumb() const {
@@ -89,10 +108,22 @@ class ARMAsmParser : public TargetAsmParser {
bool isThumbOne() const {
return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
}
+ bool isThumbTwo() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2);
+ }
+ bool hasV6Ops() const {
+ return STI.getFeatureBits() & ARM::HasV6Ops;
+ }
+ bool hasV7Ops() const {
+ return STI.getFeatureBits() & ARM::HasV7Ops;
+ }
void SwitchMode() {
unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
+ bool isMClass() const {
+ return STI.getFeatureBits() & ARM::FeatureMClass;
+ }
/// @name Auto-generated Match Functions
/// {
@@ -102,43 +133,108 @@ class ARMAsmParser : public TargetAsmParser {
/// }
- OperandMatchResultTy tryParseCoprocNumOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseCoprocRegOperand(
+ OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseCoprocNumOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseMemBarrierOptOperand(
+ OperandMatchResultTy parseCoprocRegOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseProcIFlagsOperand(
+ OperandMatchResultTy parseCoprocOptionOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseMSRMaskOperand(
+ OperandMatchResultTy parseMemBarrierOptOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseMemMode2Operand(
+ OperandMatchResultTy parseProcIFlagsOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseMemMode3Operand(
+ OperandMatchResultTy parseMSRMaskOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O,
+ StringRef Op, int Low, int High);
+ OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ return parsePKHImm(O, "lsl", 0, 31);
+ }
+ OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ return parsePKHImm(O, "asr", 1, 32);
+ }
+ OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
// Asm Match Converter Methods
- bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+
+ bool validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ void processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ bool shouldOmitCCOutOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
public:
+ enum ARMMatchResultTy {
+ Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresNotITBlock,
+ Match_RequiresV6,
+ Match_RequiresThumb2
+ };
+
ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : TargetAsmParser(), STI(_STI), Parser(_Parser) {
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ // Not in an ITBlock to start with.
+ ITState.CurPosition = ~0U;
}
- virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- virtual bool ParseDirective(AsmToken DirectiveID);
+ // Implementation of the MCTargetAsmParser interface:
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseDirective(AsmToken DirectiveID);
+
+ unsigned checkTargetMatchPredicate(MCInst &Inst);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
};
} // end anonymous namespace
@@ -148,22 +244,30 @@ namespace {
/// instruction.
class ARMOperand : public MCParsedAsmOperand {
enum KindTy {
- CondCode,
- CCOut,
- CoprocNum,
- CoprocReg,
- Immediate,
- MemBarrierOpt,
- Memory,
- MSRMask,
- ProcIFlags,
- Register,
- RegisterList,
- DPRRegisterList,
- SPRRegisterList,
- ShiftedRegister,
- Shifter,
- Token
+ k_CondCode,
+ k_CCOut,
+ k_ITCondMask,
+ k_CoprocNum,
+ k_CoprocReg,
+ k_CoprocOption,
+ k_Immediate,
+ k_FPImmediate,
+ k_MemBarrierOpt,
+ k_Memory,
+ k_PostIndexRegister,
+ k_MSRMask,
+ k_ProcIFlags,
+ k_VectorIndex,
+ k_Register,
+ k_RegisterList,
+ k_DPRRegisterList,
+ k_SPRRegisterList,
+ k_ShiftedRegister,
+ k_ShiftedImmediate,
+ k_ShifterImmediate,
+ k_RotateImmediate,
+ k_BitfieldDescriptor,
+ k_Token
} Kind;
SMLoc StartLoc, EndLoc;
@@ -175,12 +279,20 @@ class ARMOperand : public MCParsedAsmOperand {
} CC;
struct {
- ARM_MB::MemBOpt Val;
- } MBOpt;
+ unsigned Val;
+ } Cop;
struct {
unsigned Val;
- } Cop;
+ } CoprocOption;
+
+ struct {
+ unsigned Mask:4;
+ } ITMask;
+
+ struct {
+ ARM_MB::MemBOpt Val;
+ } MBOpt;
struct {
ARM_PROC::IFlags Val;
@@ -200,37 +312,60 @@ class ARMOperand : public MCParsedAsmOperand {
} Reg;
struct {
+ unsigned Val;
+ } VectorIndex;
+
+ struct {
const MCExpr *Val;
} Imm;
+ struct {
+ unsigned Val; // encoded 8-bit representation
+ } FPImm;
+
/// Combined record for all forms of ARM address expressions.
struct {
- ARMII::AddrMode AddrMode;
unsigned BaseRegNum;
- union {
- unsigned RegNum; ///< Offset register num, when OffsetIsReg.
- const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
- } Offset;
- const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
- enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true
- unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
- unsigned Preindexed : 1;
- unsigned Postindexed : 1;
- unsigned OffsetIsReg : 1;
- unsigned Negative : 1; // only used when OffsetIsReg is true
- unsigned Writeback : 1;
- } Mem;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ } Memory;
struct {
+ unsigned RegNum;
+ bool isAdd;
ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ } PostIdxReg;
+
+ struct {
+ bool isASR;
unsigned Imm;
- } Shift;
+ } ShifterImm;
struct {
ARM_AM::ShiftOpc ShiftTy;
unsigned SrcReg;
unsigned ShiftReg;
unsigned ShiftImm;
- } ShiftedReg;
+ } RegShiftedReg;
+ struct {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ } RegShiftedImm;
+ struct {
+ unsigned Imm;
+ } RotImm;
+ struct {
+ unsigned LSB;
+ unsigned Width;
+ } Bitfield;
};
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -240,45 +375,69 @@ public:
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
- case CondCode:
+ case k_CondCode:
CC = o.CC;
break;
- case Token:
+ case k_ITCondMask:
+ ITMask = o.ITMask;
+ break;
+ case k_Token:
Tok = o.Tok;
break;
- case CCOut:
- case Register:
+ case k_CCOut:
+ case k_Register:
Reg = o.Reg;
break;
- case RegisterList:
- case DPRRegisterList:
- case SPRRegisterList:
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList:
Registers = o.Registers;
break;
- case CoprocNum:
- case CoprocReg:
+ case k_CoprocNum:
+ case k_CoprocReg:
Cop = o.Cop;
break;
- case Immediate:
+ case k_CoprocOption:
+ CoprocOption = o.CoprocOption;
+ break;
+ case k_Immediate:
Imm = o.Imm;
break;
- case MemBarrierOpt:
+ case k_FPImmediate:
+ FPImm = o.FPImm;
+ break;
+ case k_MemBarrierOpt:
MBOpt = o.MBOpt;
break;
- case Memory:
- Mem = o.Mem;
+ case k_Memory:
+ Memory = o.Memory;
+ break;
+ case k_PostIndexRegister:
+ PostIdxReg = o.PostIdxReg;
break;
- case MSRMask:
+ case k_MSRMask:
MMask = o.MMask;
break;
- case ProcIFlags:
+ case k_ProcIFlags:
IFlags = o.IFlags;
break;
- case Shifter:
- Shift = o.Shift;
+ case k_ShifterImmediate:
+ ShifterImm = o.ShifterImm;
+ break;
+ case k_ShiftedRegister:
+ RegShiftedReg = o.RegShiftedReg;
+ break;
+ case k_ShiftedImmediate:
+ RegShiftedImm = o.RegShiftedImm;
break;
- case ShiftedRegister:
- ShiftedReg = o.ShiftedReg;
+ case k_RotateImmediate:
+ RotImm = o.RotImm;
+ break;
+ case k_BitfieldDescriptor:
+ Bitfield = o.Bitfield;
+ break;
+ case k_VectorIndex:
+ VectorIndex = o.VectorIndex;
break;
}
}
@@ -289,94 +448,96 @@ public:
SMLoc getEndLoc() const { return EndLoc; }
ARMCC::CondCodes getCondCode() const {
- assert(Kind == CondCode && "Invalid access!");
+ assert(Kind == k_CondCode && "Invalid access!");
return CC.Val;
}
unsigned getCoproc() const {
- assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+ assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!");
return Cop.Val;
}
StringRef getToken() const {
- assert(Kind == Token && "Invalid access!");
+ assert(Kind == k_Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
unsigned getReg() const {
- assert((Kind == Register || Kind == CCOut) && "Invalid access!");
+ assert((Kind == k_Register || Kind == k_CCOut) && "Invalid access!");
return Reg.RegNum;
}
const SmallVectorImpl<unsigned> &getRegList() const {
- assert((Kind == RegisterList || Kind == DPRRegisterList ||
- Kind == SPRRegisterList) && "Invalid access!");
+ assert((Kind == k_RegisterList || Kind == k_DPRRegisterList ||
+ Kind == k_SPRRegisterList) && "Invalid access!");
return Registers;
}
const MCExpr *getImm() const {
- assert(Kind == Immediate && "Invalid access!");
+ assert(Kind == k_Immediate && "Invalid access!");
return Imm.Val;
}
+ unsigned getFPImm() const {
+ assert(Kind == k_FPImmediate && "Invalid access!");
+ return FPImm.Val;
+ }
+
+ unsigned getVectorIndex() const {
+ assert(Kind == k_VectorIndex && "Invalid access!");
+ return VectorIndex.Val;
+ }
+
ARM_MB::MemBOpt getMemBarrierOpt() const {
- assert(Kind == MemBarrierOpt && "Invalid access!");
+ assert(Kind == k_MemBarrierOpt && "Invalid access!");
return MBOpt.Val;
}
ARM_PROC::IFlags getProcIFlags() const {
- assert(Kind == ProcIFlags && "Invalid access!");
+ assert(Kind == k_ProcIFlags && "Invalid access!");
return IFlags.Val;
}
unsigned getMSRMask() const {
- assert(Kind == MSRMask && "Invalid access!");
+ assert(Kind == k_MSRMask && "Invalid access!");
return MMask.Val;
}
- /// @name Memory Operand Accessors
- /// @{
- ARMII::AddrMode getMemAddrMode() const {
- return Mem.AddrMode;
- }
- unsigned getMemBaseRegNum() const {
- return Mem.BaseRegNum;
- }
- unsigned getMemOffsetRegNum() const {
- assert(Mem.OffsetIsReg && "Invalid access!");
- return Mem.Offset.RegNum;
- }
- const MCExpr *getMemOffset() const {
- assert(!Mem.OffsetIsReg && "Invalid access!");
- return Mem.Offset.Value;
- }
- unsigned getMemOffsetRegShifted() const {
- assert(Mem.OffsetIsReg && "Invalid access!");
- return Mem.OffsetRegShifted;
+ bool isCoprocNum() const { return Kind == k_CoprocNum; }
+ bool isCoprocReg() const { return Kind == k_CoprocReg; }
+ bool isCoprocOption() const { return Kind == k_CoprocOption; }
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isCCOut() const { return Kind == k_CCOut; }
+ bool isITMask() const { return Kind == k_ITCondMask; }
+ bool isITCondCode() const { return Kind == k_CondCode; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isFPImm() const { return Kind == k_FPImmediate; }
+ bool isImm8s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
- const MCExpr *getMemShiftAmount() const {
- assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
- return Mem.ShiftAmount;
+ bool isImm0_1020s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
}
- enum ARM_AM::ShiftOpc getMemShiftType() const {
- assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
- return Mem.ShiftType;
+ bool isImm0_508s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
}
- bool getMemPreindexed() const { return Mem.Preindexed; }
- bool getMemPostindexed() const { return Mem.Postindexed; }
- bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; }
- bool getMemNegative() const { return Mem.Negative; }
- bool getMemWriteback() const { return Mem.Writeback; }
-
- /// @}
-
- bool isCoprocNum() const { return Kind == CoprocNum; }
- bool isCoprocReg() const { return Kind == CoprocReg; }
- bool isCondCode() const { return Kind == CondCode; }
- bool isCCOut() const { return Kind == CCOut; }
- bool isImm() const { return Kind == Immediate; }
bool isImm0_255() const {
- if (Kind != Immediate)
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@@ -384,7 +545,7 @@ public:
return Value >= 0 && Value < 256;
}
bool isImm0_7() const {
- if (Kind != Immediate)
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@@ -392,130 +553,365 @@ public:
return Value >= 0 && Value < 8;
}
bool isImm0_15() const {
- if (Kind != Immediate)
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 16;
}
+ bool isImm0_31() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
+ }
+ bool isImm1_16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 17;
+ }
+ bool isImm1_32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 33;
+ }
bool isImm0_65535() const {
- if (Kind != Immediate)
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 65536;
}
- bool isT2SOImm() const {
- if (Kind != Immediate)
+ bool isImm0_65535Expr() const {
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
int64_t Value = CE->getValue();
- return ARM_AM::getT2SOImmVal(Value) != -1;
+ return Value >= 0 && Value < 65536;
}
- bool isReg() const { return Kind == Register; }
- bool isRegList() const { return Kind == RegisterList; }
- bool isDPRRegList() const { return Kind == DPRRegisterList; }
- bool isSPRRegList() const { return Kind == SPRRegisterList; }
- bool isToken() const { return Kind == Token; }
- bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
- bool isMemory() const { return Kind == Memory; }
- bool isShifter() const { return Kind == Shifter; }
- bool isShiftedReg() const { return Kind == ShiftedRegister; }
- bool isMemMode2() const {
- if (getMemAddrMode() != ARMII::AddrMode2)
+ bool isImm24bit() const {
+ if (Kind != k_Immediate)
return false;
-
- if (getMemOffsetIsReg())
- return true;
-
- if (getMemNegative() &&
- !(getMemPostindexed() || getMemPreindexed()))
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 0xffffff;
+ }
+ bool isImmThumbSR() const {
+ if (Kind != k_Immediate)
return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
-
- // The offset must be in the range 0-4095 (imm12).
- if (Value > 4095 || Value < -4095)
+ return Value > 0 && Value < 33;
+ }
+ bool isPKHLSLImm() const {
+ if (Kind != k_Immediate)
return false;
-
- return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
}
- bool isMemMode3() const {
- if (getMemAddrMode() != ARMII::AddrMode3)
+ bool isPKHASRImm() const {
+ if (Kind != k_Immediate)
return false;
-
- if (getMemOffsetIsReg()) {
- if (getMemOffsetRegShifted())
- return false; // No shift with offset reg allowed
- return true;
- }
-
- if (getMemNegative() &&
- !(getMemPostindexed() || getMemPreindexed()))
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isARMSOImm() const {
+ if (Kind != k_Immediate)
return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
-
- // The offset must be in the range 0-255 (imm8).
- if (Value > 255 || Value < -255)
+ return ARM_AM::getSOImmVal(Value) != -1;
+ }
+ bool isT2SOImm() const {
+ if (Kind != k_Immediate)
return false;
-
- return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(Value) != -1;
}
- bool isMemMode5() const {
- if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
- getMemNegative())
+ bool isSetEndImm() const {
+ if (Kind != k_Immediate)
return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
-
- // The offset must be a multiple of 4 in the range 0-1020.
int64_t Value = CE->getValue();
- return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
- }
- bool isMemMode7() const {
- if (!isMemory() ||
- getMemPreindexed() ||
- getMemPostindexed() ||
- getMemOffsetIsReg() ||
- getMemNegative() ||
- getMemWriteback())
+ return Value == 1 || Value == 0;
+ }
+ bool isReg() const { return Kind == k_Register; }
+ bool isRegList() const { return Kind == k_RegisterList; }
+ bool isDPRRegList() const { return Kind == k_DPRRegisterList; }
+ bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
+ bool isMemory() const { return Kind == k_Memory; }
+ bool isShifterImm() const { return Kind == k_ShifterImmediate; }
+ bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
+ bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
+ bool isRotImm() const { return Kind == k_RotateImmediate; }
+ bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
+ bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
+ bool isPostIdxReg() const {
+ return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ }
+ bool isMemNoOffset(bool alignOK = false) const {
+ if (!isMemory())
return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ // No offset of any kind.
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
+ (alignOK || Memory.Alignment == 0);
+ }
+ bool isAlignedMemory() const {
+ return isMemNoOffset(true);
+ }
+ bool isAddrMode2() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAM2OffsetImm() const {
+ if (Kind != k_Immediate)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
-
- if (CE->getValue())
+ int64_t Val = CE->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAddrMode3() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // No shifts are legal for AM3.
+ if (Memory.ShiftType != ARM_AM::no_shift) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -256 && Val < 256;
+ }
+ bool isAM3Offset() const {
+ if (Kind != k_Immediate && Kind != k_PostIndexRegister)
+ return false;
+ if (Kind == k_PostIndexRegister)
+ return PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ // Immediate offset in range [-255, 255].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ // Special case, #-0 is INT32_MIN.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
+ }
+ bool isAddrMode5() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return false;
+ // Immediate offset in range [-1020, 1020] and a multiple of 4.
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
+ Val == INT32_MIN;
+ }
+ bool isMemTBB() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return true;
+ }
+ bool isMemTBH() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
+ Memory.Alignment != 0 )
+ return false;
+ return true;
+ }
+ bool isMemRegOffset() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0)
return false;
-
return true;
}
- bool isMemModeRegThumb() const {
- if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
+ bool isT2MemRegOffset() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.Alignment != 0)
+ return false;
+ // Only lsl #{0, 1, 2, 3} allowed.
+ if (Memory.ShiftType == ARM_AM::no_shift)
+ return true;
+ if (Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm > 3)
return false;
return true;
}
- bool isMemModeImmThumb() const {
- if (!isMemory() || getMemOffsetIsReg() || getMemWriteback())
+ bool isMemThumbRR() const {
+ // Thumb reg+reg addressing is simple. Just two registers, a base and
+ // an offset. No shifts, negations or any other complicating factors.
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return isARMLowRegister(Memory.BaseRegNum) &&
+ (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum));
+ }
+ bool isMemThumbRIs4() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 124].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 124 && (Val % 4) == 0;
+ }
+ bool isMemThumbRIs2() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 62].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 62 && (Val % 2) == 0;
+ }
+ bool isMemThumbRIs1() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 31].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 31;
+ }
+ bool isMemThumbSPI() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val % 4) == 0;
+ }
+ bool isMemImm8s4Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [-1020, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= -1020 && Val <= 1020 && (Val & 3) == 0;
+ }
+ bool isMemImm0_1020s4Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val & 3) == 0;
+ }
+ bool isMemImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val == INT32_MIN) || (Val > -256 && Val < 256);
+ }
+ bool isMemPosImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val < 256;
+ }
+ bool isMemNegImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-255, -1].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -256 && Val < 0;
+ }
+ bool isMemUImm12Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
+ // Immediate offset in range [0, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= 0 && Val < 4096);
+ }
+ bool isMemImm12Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+ return true;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return (Val > -256 && Val < 256) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return ((Val & 3) == 0 && Val >= -1020 && Val <= 1020) ||
+ (Val == INT32_MIN);
+ }
- // The offset must be a multiple of 4 in the range 0-124.
- uint64_t Value = CE->getValue();
- return ((Value & 0x3) == 0 && Value <= 124);
+ bool isMSRMask() const { return Kind == k_MSRMask; }
+ bool isProcIFlags() const { return Kind == k_ProcIFlags; }
+
+ bool isVectorIndex8() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 8;
+ }
+ bool isVectorIndex16() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 4;
}
- bool isMSRMask() const { return Kind == MSRMask; }
- bool isProcIFlags() const { return Kind == ProcIFlags; }
+ bool isVectorIndex32() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 2;
+ }
+
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
@@ -544,6 +940,21 @@ public:
Inst.addOperand(MCOperand::CreateImm(getCoproc()));
}
+ void addCoprocOptionOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(CoprocOption.Val));
+ }
+
+ void addITMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(ITMask.Mask));
+ }
+
+ void addITCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ }
+
void addCCOutOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
@@ -554,22 +965,27 @@ public:
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
- void addShiftedRegOperands(MCInst &Inst, unsigned N) const {
+ void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
- assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!");
- assert((ShiftedReg.ShiftReg == 0 ||
- ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) &&
- "Invalid shifted register operand!");
- Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg));
- Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg));
+ assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!");
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg));
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg));
Inst.addOperand(MCOperand::CreateImm(
- ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm)));
+ ARM_AM::getSORegOpc(RegShiftedReg.ShiftTy, RegShiftedReg.ShiftImm)));
}
- void addShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
+ void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!");
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
Inst.addOperand(MCOperand::CreateImm(
- ARM_AM::getSORegOpc(Shift.ShiftTy, 0)));
+ ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm)));
+ }
+
+ void addShifterImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm((ShifterImm.isASR << 5) |
+ ShifterImm.Imm));
}
void addRegListOperands(MCInst &Inst, unsigned N) const {
@@ -588,11 +1004,57 @@ public:
addRegListOperands(Inst, N);
}
+ void addRotImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Encoded as val>>3. The printer handles display as 8, 16, 24.
+ Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3));
+ }
+
+ void addBitfieldOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Munge the lsb/width into a bitfield mask.
+ unsigned lsb = Bitfield.LSB;
+ unsigned width = Bitfield.Width;
+ // Make a 32-bit mask w/ the referenced bits clear and all other bits set.
+ uint32_t Mask = ~(((uint32_t)0xffffffff >> lsb) << (32 - width) >>
+ (32 - (lsb + width)));
+ Inst.addOperand(MCOperand::CreateImm(Mask));
+ }
+
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+ void addFPImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+ }
+
+ void addImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // FIXME: We really want to scale the value here, but the LDRD/STRD
+ // instruction don't encode operands that way yet.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ }
+
+ void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ }
+
void addImm0_255Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
@@ -608,137 +1070,344 @@ public:
addExpr(Inst, getImm());
}
+ void addImm0_31Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm1_16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addImm1_32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+ void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm24bitOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate, except for 32, which encodes as
+ // zero.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Imm = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm)));
+ }
+
+ void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // An ASR value of 32 encodes as 0, so that's how we want to add it to
+ // the instruction as well.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val));
+ }
+
+ void addARMSOImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+ void addSetEndImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
}
- void addMemMode7Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && isMemMode7() && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- (void)CE;
- assert((CE || CE->getValue() == 0) &&
- "No offset operand support in mode 7");
+ void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
}
- void addMemMode2Operands(MCInst &Inst, unsigned N) const {
- assert(isMemMode2() && "Invalid mode or number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+ void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Memory.Alignment));
+ }
- if (getMemOffsetIsReg()) {
- Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ void addAddrMode2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ }
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
- ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift;
- int64_t ShiftAmount = 0;
+ void addAM2OffsetImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant AM2OffsetImm operand!");
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- if (getMemOffsetRegShifted()) {
- ShOpc = getMemShiftType();
- const MCConstantExpr *CE =
- dyn_cast<MCConstantExpr>(getMemShiftAmount());
- ShiftAmount = CE->getValue();
- }
+ void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM3Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, 0);
+ }
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount,
- ShOpc, IdxMode)));
+ void addAM3OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ if (Kind == k_PostIndexRegister) {
+ int32_t Val =
+ ARM_AM::getAM3Opc(PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub, 0);
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
return;
}
- // Create a operand placeholder to always yield the same number of operands.
+ // Constant offset.
+ const MCConstantExpr *CE = static_cast<const MCConstantExpr*>(getImm());
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- // FIXME: #-0 is encoded differently than #0. Does the parser preserve
- // the difference?
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- assert(CE && "Non-constant mode 2 offset operand!");
- int64_t Offset = CE->getValue();
+ void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM5Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- if (Offset >= 0)
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add,
- Offset, ARM_AM::no_shift, IdxMode)));
- else
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub,
- -Offset, ARM_AM::no_shift, IdxMode)));
+ void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
+ }
+
+ void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
}
- void addMemMode3Operands(MCInst &Inst, unsigned N) const {
- assert(isMemMode3() && "Invalid mode or number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+ void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (Kind == k_Immediate) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
- if (getMemOffsetIsReg()) {
- Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0,
- IdxMode)));
+ void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (Kind == k_Immediate) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
return;
}
- // Create a operand placeholder to always yield the same number of operands.
- Inst.addOperand(MCOperand::CreateReg(0));
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- // FIXME: #-0 is encoded differently than #0. Does the parser preserve
- // the difference?
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- assert(CE && "Non-constant mode 3 offset operand!");
- int64_t Offset = CE->getValue();
+ void addMemTBBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
- if (Offset >= 0)
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add,
- Offset, IdxMode)));
- else
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub,
- -Offset, IdxMode)));
+ void addMemTBHOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
}
- void addMemMode5Operands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemMode5() && "Invalid number of operands!");
+ void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- assert(!getMemOffsetIsReg() && "Invalid mode 5 operand");
+ void addT2MemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Memory.ShiftImm));
+ }
- // FIXME: #-0 is encoded differently than #0. Does the parser preserve
- // the difference?
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- assert(CE && "Non-constant mode 5 offset operand!");
+ void addMemThumbRROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
- // The MCInst offset operand doesn't include the low two bits (like
- // the instruction encoding).
- int64_t Offset = CE->getValue() / 4;
- if (Offset >= 0)
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
- Offset)));
- else
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
- -Offset)));
+ void addMemThumbRIs4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
}
- void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ void addMemThumbRIs2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 2) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
}
- void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- assert(CE && "Non-constant mode offset operand!");
- Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ void addMemThumbRIs1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue()) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemThumbSPIOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addPostIdxImm8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addPostIdxImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8s4 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ // Immediate is scaled by 4.
+ Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addPostIdxRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(PostIdxReg.isAdd));
+ }
+
+ void addPostIdxRegShiftedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ // The sign, shift type, and shift amount are encoded in a single operand
+ // using the AM2 encoding helpers.
+ ARM_AM::AddrOpc opc = PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub;
+ unsigned Imm = ARM_AM::getAM2Opc(opc, PostIdxReg.ShiftImm,
+ PostIdxReg.ShiftTy);
+ Inst.addOperand(MCOperand::CreateImm(Imm));
}
void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
@@ -751,10 +1420,33 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
}
+ void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addVectorIndex16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addVectorIndex32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
virtual void print(raw_ostream &OS) const;
+ static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_ITCondMask);
+ Op->ITMask.Mask = Mask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
- ARMOperand *Op = new ARMOperand(CondCode);
+ ARMOperand *Op = new ARMOperand(k_CondCode);
Op->CC.Val = CC;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -762,7 +1454,7 @@ public:
}
static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
- ARMOperand *Op = new ARMOperand(CoprocNum);
+ ARMOperand *Op = new ARMOperand(k_CoprocNum);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -770,15 +1462,23 @@ public:
}
static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
- ARMOperand *Op = new ARMOperand(CoprocReg);
+ ARMOperand *Op = new ARMOperand(k_CoprocReg);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
+ static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_CoprocOption);
+ Op->Cop.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
- ARMOperand *Op = new ARMOperand(CCOut);
+ ARMOperand *Op = new ARMOperand(k_CCOut);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -786,7 +1486,7 @@ public:
}
static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
- ARMOperand *Op = new ARMOperand(Token);
+ ARMOperand *Op = new ARMOperand(k_Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -795,7 +1495,7 @@ public:
}
static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(Register);
+ ARMOperand *Op = new ARMOperand(k_Register);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -807,20 +1507,52 @@ public:
unsigned ShiftReg,
unsigned ShiftImm,
SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(ShiftedRegister);
- Op->ShiftedReg.ShiftTy = ShTy;
- Op->ShiftedReg.SrcReg = SrcReg;
- Op->ShiftedReg.ShiftReg = ShiftReg;
- Op->ShiftedReg.ShiftImm = ShiftImm;
+ ARMOperand *Op = new ARMOperand(k_ShiftedRegister);
+ Op->RegShiftedReg.ShiftTy = ShTy;
+ Op->RegShiftedReg.SrcReg = SrcReg;
+ Op->RegShiftedReg.ShiftReg = ShiftReg;
+ Op->RegShiftedReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy,
+ unsigned SrcReg,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_ShiftedImmediate);
+ Op->RegShiftedImm.ShiftTy = ShTy;
+ Op->RegShiftedImm.SrcReg = SrcReg;
+ Op->RegShiftedImm.ShiftImm = ShiftImm;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy,
+ static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm,
SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(Shifter);
- Op->Shift.ShiftTy = ShTy;
+ ARMOperand *Op = new ARMOperand(k_ShifterImmediate);
+ Op->ShifterImm.isASR = isASR;
+ Op->ShifterImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_RotateImmediate);
+ Op->RotImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor);
+ Op->Bitfield.LSB = LSB;
+ Op->Bitfield.Width = Width;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
@@ -829,12 +1561,13 @@ public:
static ARMOperand *
CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
SMLoc StartLoc, SMLoc EndLoc) {
- KindTy Kind = RegisterList;
+ KindTy Kind = k_RegisterList;
- if (ARM::DPRRegClass.contains(Regs.front().first))
- Kind = DPRRegisterList;
- else if (ARM::SPRRegClass.contains(Regs.front().first))
- Kind = SPRRegisterList;
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first))
+ Kind = k_DPRRegisterList;
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
+ contains(Regs.front().first))
+ Kind = k_SPRRegisterList;
ARMOperand *Op = new ARMOperand(Kind);
for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
@@ -846,55 +1579,68 @@ public:
return Op;
}
+ static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ ARMOperand *Op = new ARMOperand(k_VectorIndex);
+ Op->VectorIndex.Val = Idx;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(Immediate);
+ ARMOperand *Op = new ARMOperand(k_Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum,
- bool OffsetIsReg, const MCExpr *Offset,
- int OffsetRegNum, bool OffsetRegShifted,
- enum ARM_AM::ShiftOpc ShiftType,
- const MCExpr *ShiftAmount, bool Preindexed,
- bool Postindexed, bool Negative, bool Writeback,
+ static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
+ ARMOperand *Op = new ARMOperand(k_FPImmediate);
+ Op->FPImm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateMem(unsigned BaseRegNum,
+ const MCConstantExpr *OffsetImm,
+ unsigned OffsetRegNum,
+ ARM_AM::ShiftOpc ShiftType,
+ unsigned ShiftImm,
+ unsigned Alignment,
+ bool isNegative,
SMLoc S, SMLoc E) {
- assert((OffsetRegNum == -1 || OffsetIsReg) &&
- "OffsetRegNum must imply OffsetIsReg!");
- assert((!OffsetRegShifted || OffsetIsReg) &&
- "OffsetRegShifted must imply OffsetIsReg!");
- assert((Offset || OffsetIsReg) &&
- "Offset must exists unless register offset is used!");
- assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) &&
- "Cannot have shift amount without shifted register offset!");
- assert((!Offset || !OffsetIsReg) &&
- "Cannot have expression offset and register offset!");
-
- ARMOperand *Op = new ARMOperand(Memory);
- Op->Mem.AddrMode = AddrMode;
- Op->Mem.BaseRegNum = BaseRegNum;
- Op->Mem.OffsetIsReg = OffsetIsReg;
- if (OffsetIsReg)
- Op->Mem.Offset.RegNum = OffsetRegNum;
- else
- Op->Mem.Offset.Value = Offset;
- Op->Mem.OffsetRegShifted = OffsetRegShifted;
- Op->Mem.ShiftType = ShiftType;
- Op->Mem.ShiftAmount = ShiftAmount;
- Op->Mem.Preindexed = Preindexed;
- Op->Mem.Postindexed = Postindexed;
- Op->Mem.Negative = Negative;
- Op->Mem.Writeback = Writeback;
+ ARMOperand *Op = new ARMOperand(k_Memory);
+ Op->Memory.BaseRegNum = BaseRegNum;
+ Op->Memory.OffsetImm = OffsetImm;
+ Op->Memory.OffsetRegNum = OffsetRegNum;
+ Op->Memory.ShiftType = ShiftType;
+ Op->Memory.ShiftImm = ShiftImm;
+ Op->Memory.Alignment = Alignment;
+ Op->Memory.isNegative = isNegative;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+ static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd,
+ ARM_AM::ShiftOpc ShiftTy,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_PostIndexRegister);
+ Op->PostIdxReg.RegNum = RegNum;
+ Op->PostIdxReg.isAdd = isAdd;
+ Op->PostIdxReg.ShiftTy = ShiftTy;
+ Op->PostIdxReg.ShiftImm = ShiftImm;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
- ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+ ARMOperand *Op = new ARMOperand(k_MemBarrierOpt);
Op->MBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -902,7 +1648,7 @@ public:
}
static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
- ARMOperand *Op = new ARMOperand(ProcIFlags);
+ ARMOperand *Op = new ARMOperand(k_ProcIFlags);
Op->IFlags.Val = IFlags;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -910,7 +1656,7 @@ public:
}
static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
- ARMOperand *Op = new ARMOperand(MSRMask);
+ ARMOperand *Op = new ARMOperand(k_MSRMask);
Op->MMask.Val = MMask;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -922,53 +1668,56 @@ public:
void ARMOperand::print(raw_ostream &OS) const {
switch (Kind) {
- case CondCode:
+ case k_FPImmediate:
+ OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm())
+ << ") >";
+ break;
+ case k_CondCode:
OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
break;
- case CCOut:
+ case k_CCOut:
OS << "<ccout " << getReg() << ">";
break;
- case CoprocNum:
+ case k_ITCondMask: {
+ static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)",
+ "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)",
+ "(tee)", "(eee)" };
+ assert((ITMask.Mask & 0xf) == ITMask.Mask);
+ OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
+ break;
+ }
+ case k_CoprocNum:
OS << "<coprocessor number: " << getCoproc() << ">";
break;
- case CoprocReg:
+ case k_CoprocReg:
OS << "<coprocessor register: " << getCoproc() << ">";
break;
- case MSRMask:
+ case k_CoprocOption:
+ OS << "<coprocessor option: " << CoprocOption.Val << ">";
+ break;
+ case k_MSRMask:
OS << "<mask: " << getMSRMask() << ">";
break;
- case Immediate:
+ case k_Immediate:
getImm()->print(OS);
break;
- case MemBarrierOpt:
+ case k_MemBarrierOpt:
OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
break;
- case Memory:
+ case k_Memory:
OS << "<memory "
- << "am:" << ARMII::AddrModeToString(getMemAddrMode())
- << " base:" << getMemBaseRegNum();
- if (getMemOffsetIsReg()) {
- OS << " offset:<register " << getMemOffsetRegNum();
- if (getMemOffsetRegShifted()) {
- OS << " offset-shift-type:" << getMemShiftType();
- OS << " offset-shift-amount:" << *getMemShiftAmount();
- }
- } else {
- OS << " offset:" << *getMemOffset();
- }
- if (getMemOffsetIsReg())
- OS << " (offset-is-reg)";
- if (getMemPreindexed())
- OS << " (pre-indexed)";
- if (getMemPostindexed())
- OS << " (post-indexed)";
- if (getMemNegative())
- OS << " (negative)";
- if (getMemWriteback())
- OS << " (writeback)";
+ << " base:" << Memory.BaseRegNum;
+ OS << ">";
+ break;
+ case k_PostIndexRegister:
+ OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-")
+ << PostIdxReg.RegNum;
+ if (PostIdxReg.ShiftTy != ARM_AM::no_shift)
+ OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " "
+ << PostIdxReg.ShiftImm;
OS << ">";
break;
- case ProcIFlags: {
+ case k_ProcIFlags: {
OS << "<ARM_PROC::";
unsigned IFlags = getProcIFlags();
for (int i=2; i >= 0; --i)
@@ -977,23 +1726,38 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << ">";
break;
}
- case Register:
+ case k_Register:
OS << "<register " << getReg() << ">";
break;
- case Shifter:
- OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">";
+ case k_ShifterImmediate:
+ OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl")
+ << " #" << ShifterImm.Imm << ">";
+ break;
+ case k_ShiftedRegister:
+ OS << "<so_reg_reg "
+ << RegShiftedReg.SrcReg
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm))
+ << ", " << RegShiftedReg.ShiftReg << ", "
+ << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm)
+ << ">";
break;
- case ShiftedRegister:
- OS << "<so_reg"
- << ShiftedReg.SrcReg
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm))
- << ", " << ShiftedReg.ShiftReg << ", "
- << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm)
+ case k_ShiftedImmediate:
+ OS << "<so_reg_imm "
+ << RegShiftedImm.SrcReg
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm))
+ << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm)
<< ">";
break;
- case RegisterList:
- case DPRRegisterList:
- case SPRRegisterList: {
+ case k_RotateImmediate:
+ OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
+ break;
+ case k_BitfieldDescriptor:
+ OS << "<bitfield " << "lsb: " << Bitfield.LSB
+ << ", width: " << Bitfield.Width << ">";
+ break;
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList: {
OS << "<register_list ";
const SmallVectorImpl<unsigned> &RegList = getRegList();
@@ -1006,9 +1770,12 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << ">";
break;
}
- case Token:
+ case k_Token:
OS << "'" << getToken() << "'";
break;
+ case k_VectorIndex:
+ OS << "<vectorindex " << getVectorIndex() << ">";
+ break;
}
}
@@ -1021,7 +1788,7 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
- RegNo = TryParseRegister();
+ RegNo = tryParseRegister();
return (RegNo == (unsigned)-1);
}
@@ -1030,9 +1797,9 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo,
/// and if it is a register name the token is eaten and the register number is
/// returned. Otherwise return -1.
///
-int ARMAsmParser::TryParseRegister() {
+int ARMAsmParser::tryParseRegister() {
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (Tok.isNot(AsmToken::Identifier)) return -1;
// FIXME: Validate register for the current architecture; we have to do
// validation later, so maybe there is no need for this here.
@@ -1050,6 +1817,39 @@ int ARMAsmParser::TryParseRegister() {
if (!RegNum) return -1;
Parser.Lex(); // Eat identifier token.
+
+#if 0
+ // Also check for an index operand. This is only legal for vector registers,
+ // but that'll get caught OK in operand matching, so we don't need to
+ // explicitly filter everything else out here.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for vector index");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(E, "']' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
+ SIdx, E,
+ getContext()));
+ }
+#endif
+
return RegNum;
}
@@ -1058,7 +1858,7 @@ int ARMAsmParser::TryParseRegister() {
// occurs, return -1. An irrecoverable error is one where tokens have been
// consumed in the process of trying to parse the shifter (i.e., when it is
// indeed a shifter operand, but malformed).
-int ARMAsmParser::TryParseShiftRegister(
+int ARMAsmParser::tryParseShiftRegister(
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
@@ -1120,7 +1920,7 @@ int ARMAsmParser::TryParseShiftRegister(
return -1;
}
} else if (Parser.getTok().is(AsmToken::Identifier)) {
- ShiftReg = TryParseRegister();
+ ShiftReg = tryParseRegister();
SMLoc L = Parser.getTok().getLoc();
if (ShiftReg == -1) {
Error (L, "expected immediate or register in shift operand");
@@ -1133,8 +1933,12 @@ int ARMAsmParser::TryParseShiftRegister(
}
}
- Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
- ShiftReg, Imm,
+ if (ShiftReg && ShiftTy != ARM_AM::rrx)
+ Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+ ShiftReg, Imm,
+ S, Parser.getTok().getLoc()));
+ else
+ Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
S, Parser.getTok().getLoc()));
return 0;
@@ -1148,9 +1952,9 @@ int ARMAsmParser::TryParseShiftRegister(
/// TODO this is likely to change to allow different register types and or to
/// parse for a specific register type.
bool ARMAsmParser::
-TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
- int RegNo = TryParseRegister();
+ int RegNo = tryParseRegister();
if (RegNo == -1)
return true;
@@ -1161,6 +1965,37 @@ TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
ExclaimTok.getLoc()));
Parser.Lex(); // Eat exclaim token
+ return false;
+ }
+
+ // Also check for an index operand. This is only legal for vector registers,
+ // but that'll get caught OK in operand matching, so we don't need to
+ // explicitly filter everything else out here.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for vector index");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(E, "']' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
+ SIdx, E,
+ getContext()));
}
return false;
@@ -1209,14 +2044,50 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
return -1;
}
-/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// parseITCondCode - Try to parse a condition code for an IT instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ unsigned CC = StringSwitch<unsigned>(Tok.getString())
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC == ~0U)
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the token.
+
+ Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), S));
+
+ return MatchOperand_Success;
+}
+
+/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
if (Num == -1)
@@ -1227,14 +2098,15 @@ tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
if (Reg == -1)
@@ -1245,93 +2117,155 @@ tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// Parse a register list, return it if successful else return null. The first
-/// token must be a '{' when called.
-bool ARMAsmParser::
-ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- assert(Parser.getTok().is(AsmToken::LCurly) &&
- "Token is not a Left Curly Brace");
+/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
+/// coproc_option : '{' imm0_255 '}'
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
- // Read the rest of the registers in the list.
- unsigned PrevRegNum = 0;
- SmallVector<std::pair<unsigned, SMLoc>, 32> Registers;
-
- do {
- bool IsRange = Parser.getTok().is(AsmToken::Minus);
- Parser.Lex(); // Eat non-identifier token.
-
- const AsmToken &RegTok = Parser.getTok();
- SMLoc RegLoc = RegTok.getLoc();
- if (RegTok.isNot(AsmToken::Identifier)) {
- Error(RegLoc, "register expected");
- return true;
- }
-
- int RegNum = TryParseRegister();
- if (RegNum == -1) {
- Error(RegLoc, "register expected");
- return true;
- }
-
- if (IsRange) {
- int Reg = PrevRegNum;
- do {
- ++Reg;
- Registers.push_back(std::make_pair(Reg, RegLoc));
- } while (Reg != RegNum);
- } else {
- Registers.push_back(std::make_pair(RegNum, RegLoc));
- }
-
- PrevRegNum = RegNum;
- } while (Parser.getTok().is(AsmToken::Comma) ||
- Parser.getTok().is(AsmToken::Minus));
+ // If this isn't a '{', this isn't a coprocessor immediate operand.
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the '{'
- // Process the right curly brace of the list.
- const AsmToken &RCurlyTok = Parser.getTok();
- if (RCurlyTok.isNot(AsmToken::RCurly)) {
- Error(RCurlyTok.getLoc(), "'}' expected");
- return true;
+ const MCExpr *Expr;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(Expr)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
}
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE || CE->getValue() < 0 || CE->getValue() > 255) {
+ Error(Loc, "coprocessor option must be an immediate in range [0, 255]");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
- SMLoc E = RCurlyTok.getLoc();
- Parser.Lex(); // Eat right curly brace token.
-
- // Verify the register list.
- SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
- RI = Registers.begin(), RE = Registers.end();
+ // Check for and consume the closing '}'
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return MatchOperand_ParseFail;
+ SMLoc E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the '}'
- unsigned HighRegNum = getARMRegisterNumbering(RI->first);
- bool EmittedWarning = false;
+ Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
+ return MatchOperand_Success;
+}
- DenseMap<unsigned, bool> RegMap;
- RegMap[HighRegNum] = true;
+// For register list parsing, we need to map from raw GPR register numbering
+// to the enumeration values. The enumeration values aren't sorted by
+// register number due to our using "sp", "lr" and "pc" as canonical names.
+static unsigned getNextRegister(unsigned Reg) {
+ // If this is a GPR, we need to do it manually, otherwise we can rely
+ // on the sort ordering of the enumeration since the other reg-classes
+ // are sane.
+ if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ return Reg + 1;
+ switch(Reg) {
+ default: assert(0 && "Invalid GPR number!");
+ case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2;
+ case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4;
+ case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6;
+ case ARM::R6: return ARM::R7; case ARM::R7: return ARM::R8;
+ case ARM::R8: return ARM::R9; case ARM::R9: return ARM::R10;
+ case ARM::R10: return ARM::R11; case ARM::R11: return ARM::R12;
+ case ARM::R12: return ARM::SP; case ARM::SP: return ARM::LR;
+ case ARM::LR: return ARM::PC; case ARM::PC: return ARM::R0;
+ }
+}
- for (++RI; RI != RE; ++RI) {
- const std::pair<unsigned, SMLoc> &RegInfo = *RI;
- unsigned Reg = getARMRegisterNumbering(RegInfo.first);
+/// Parse a register list.
+bool ARMAsmParser::
+parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LCurly) &&
+ "Token is not a Left Curly Brace");
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
- if (RegMap[Reg]) {
- Error(RegInfo.second, "register duplicated in register list");
- return true;
+ // Check the first register in the list to see what register class
+ // this is a list of.
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+
+ MCRegisterClass *RC;
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
+ else
+ return Error(RegLoc, "invalid register in register list");
+
+ // The reglist instructions have at most 16 registers, so reserve
+ // space for that many.
+ SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
+ // Store the first register.
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+
+ // This starts immediately after the first register token in the list,
+ // so we can see either a comma or a minus (range separator) as a legal
+ // next token.
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the comma.
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1)
+ return Error(EndLoc, "register expected");
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!RC->contains(EndReg))
+ return Error(EndLoc, "invalid register in register list");
+ // Ranges must go from low to high.
+ if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg))
+ return Error(EndLoc, "bad range in register list");
+
+ // Add all the registers in the range to the register list.
+ while (Reg != EndReg) {
+ Reg = getNextRegister(Reg);
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ }
+ continue;
}
-
- if (!EmittedWarning && Reg < HighRegNum)
- Warning(RegInfo.second,
- "register not in ascending order in register list");
-
- RegMap[Reg] = true;
- HighRegNum = std::max(Reg, HighRegNum);
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+ // The register must be in the same register class as the first.
+ if (!RC->contains(Reg))
+ return Error(RegLoc, "invalid register in register list");
+ // List must be monotonically increasing.
+ if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
+ return Error(RegLoc, "register list not in ascending order");
+ // VFP register lists must also be contiguous.
+ // It's OK to use the enumeration values directly here rather, as the
+ // VFP register classes have the enum sorted properly.
+ if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
+ Reg != OldReg + 1)
+ return Error(RegLoc, "non-contiguous register range");
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
}
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return Error(E, "'}' expected");
+ Parser.Lex(); // Eat '}' token.
+
Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
return false;
}
-/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
@@ -1360,28 +2294,32 @@ tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
StringRef IFlagsStr = Tok.getString();
+ // An iflags string of "none" is interpreted to mean that none of the AIF
+ // bits are set. Not a terribly useful instruction, but a valid encoding.
unsigned IFlags = 0;
- for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
- unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
- .Case("a", ARM_PROC::A)
- .Case("i", ARM_PROC::I)
- .Case("f", ARM_PROC::F)
- .Default(~0U);
-
- // If some specific iflag is already set, it means that some letter is
- // present more than once, this is not acceptable.
- if (Flag == ~0U || (IFlags & Flag))
- return MatchOperand_NoMatch;
+ if (IFlagsStr != "none") {
+ for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+ .Case("a", ARM_PROC::A)
+ .Case("i", ARM_PROC::I)
+ .Case("f", ARM_PROC::F)
+ .Default(~0U);
+
+ // If some specific iflag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (Flag == ~0U || (IFlags & Flag))
+ return MatchOperand_NoMatch;
- IFlags |= Flag;
+ IFlags |= Flag;
+ }
}
Parser.Lex(); // Eat identifier token.
@@ -1389,18 +2327,49 @@ tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
StringRef Mask = Tok.getString();
+ if (isMClass()) {
+ // See ARMv6-M 10.1.1
+ unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+ .Case("apsr", 0)
+ .Case("iapsr", 1)
+ .Case("eapsr", 2)
+ .Case("xpsr", 3)
+ .Case("ipsr", 5)
+ .Case("epsr", 6)
+ .Case("iepsr", 7)
+ .Case("msp", 8)
+ .Case("psp", 9)
+ .Case("primask", 16)
+ .Case("basepri", 17)
+ .Case("basepri_max", 18)
+ .Case("faultmask", 19)
+ .Case("control", 20)
+ .Default(~0U);
+
+ if (FlagsVal == ~0U)
+ return MatchOperand_NoMatch;
+
+ if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
+ // basepri, basepri_max and faultmask only valid for V7m.
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+ }
+
// Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
size_t Start = 0, Next = Mask.find('_');
StringRef Flags = "";
- StringRef SpecReg = Mask.slice(Start, Next);
+ std::string SpecReg = LowercaseString(Mask.slice(Start, Next));
if (Next != StringRef::npos)
Flags = Mask.slice(Next+1, Mask.size());
@@ -1411,7 +2380,7 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (SpecReg == "apsr") {
FlagsVal = StringSwitch<unsigned>(Flags)
- .Case("nzcvq", 0x8) // same as CPSR_c
+ .Case("nzcvq", 0x8) // same as CPSR_f
.Case("g", 0x4) // same as CPSR_s
.Case("nzcvqg", 0xc) // same as CPSR_fs
.Default(~0U);
@@ -1420,7 +2389,7 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!Flags.empty())
return MatchOperand_NoMatch;
else
- FlagsVal = 0; // No flag
+ FlagsVal = 8; // No flag
}
} else if (SpecReg == "cpsr" || SpecReg == "spsr") {
if (Flags == "all") // cpsr_all is an alias for cpsr_fc
@@ -1455,96 +2424,680 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
+ int Low, int High) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ std::string LowerOp = LowercaseString(Op);
+ std::string UpperOp = UppercaseString(Op);
+ if (ShiftName != LowerOp && ShiftName != UpperOp) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat shift type token.
- if (ParseMemory(Operands, ARMII::AddrMode2))
- return MatchOperand_NoMatch;
+ // There must be a '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
+ if (Val < Low || Val > High) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc()));
return MatchOperand_Success;
}
-/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = StringSwitch<int>(Tok.getString())
+ .Case("be", 1)
+ .Case("le", 0)
+ .Default(-1);
+ Parser.Lex(); // Eat the token.
+
+ if (Val == -1) {
+ Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
+ getContext()),
+ S, Parser.getTok().getLoc()));
+ return MatchOperand_Success;
+}
+
+/// parseShifterImm - Parse the shifter immediate operand for SSAT/USAT
+/// instructions. Legal values are:
+/// lsl #n 'n' in [0,31]
+/// asr #n 'n' in [1,32]
+/// n == 32 encoded as n == 0.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ bool isASR;
+ if (ShiftName == "lsl" || ShiftName == "LSL")
+ isASR = false;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ isASR = true;
+ else {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(E, "malformed shift expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(E, "shift amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
- if (ParseMemory(Operands, ARMII::AddrMode3))
+ int64_t Val = CE->getValue();
+ if (isASR) {
+ // Shift amount must be in [1,32]
+ if (Val < 1 || Val > 32) {
+ Error(E, "'asr' shift amount must be in range [1,32]");
+ return MatchOperand_ParseFail;
+ }
+ // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
+ if (isThumb() && Val == 32) {
+ Error(E, "'asr #32' shift amount not allowed in Thumb mode");
+ return MatchOperand_ParseFail;
+ }
+ if (Val == 32) Val = 0;
+ } else {
+ // Shift amount must be in [1,32]
+ if (Val < 0 || Val > 31) {
+ Error(E, "'lsr' shift amount must be in range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E));
+
+ return MatchOperand_Success;
+}
+
+/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
+/// of instructions. Legal values are:
+/// ror #n 'n' in {0, 8, 16, 24}
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName != "ror" && ShiftName != "ROR")
return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a rotate amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(E, "malformed rotate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(E, "rotate amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Val = CE->getValue();
+ // Shift amount must be in {0, 8, 16, 24} (0 is undocumented extension)
+ // normally, zero is represented in asm by omitting the rotate operand
+ // entirely.
+ if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
+ Error(E, "'ror' rotate amount must be 8, 16, or 24");
+ return MatchOperand_ParseFail;
+ }
+
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateRotImm(Val, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ // The bitfield descriptor is really two operands, the LSB and the width.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *LSBExpr;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(LSBExpr)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LSBExpr);
+ if (!CE) {
+ Error(E, "'lsb' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t LSB = CE->getValue();
+ // The LSB must be in the range [0,31]
+ if (LSB < 0 || LSB > 31) {
+ Error(E, "'lsb' operand must be in the range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getLoc();
+
+ // Expect another immediate operand.
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "too few operands");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *WidthExpr;
+ if (getParser().ParseExpression(WidthExpr)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ CE = dyn_cast<MCConstantExpr>(WidthExpr);
+ if (!CE) {
+ Error(E, "'width' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Width = CE->getValue();
+ // The LSB must be in the range [1,32-lsb]
+ if (Width < 1 || Width > 32 - LSB) {
+ Error(E, "'width' operand must be in the range [1,32-lsb]");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getLoc();
+
+ Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E));
return MatchOperand_Success;
}
-/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // postidx_reg := '+' register {, shift}
+ // | '-' register {, shift}
+ // | register {, shift}
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ bool haveEaten = false;
+ bool isAdd = true;
+ int Reg = -1;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+ if (Parser.getTok().is(AsmToken::Identifier))
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Parser.getTok().getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Parser.getTok().getLoc();
+
+ ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
+ ShiftImm, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // am3offset := '+' register
+ // | '-' register
+ // | register
+ // | # imm
+ // | # + imm
+ // | # - imm
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+
+ // Do immediates first, as we always parse those if we have a '#'.
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat the '#'.
+ // Explicitly look for a '-', as we need to encode negative zero
+ // differently.
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ if (getParser().ParseExpression(Offset))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Tok.getLoc();
+ // Negative zero is encoded as the flag value INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ Val = INT32_MIN;
+
+ Operands.push_back(
+ ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E));
+
+ return MatchOperand_Success;
+ }
+
+
+ bool haveEaten = false;
+ bool isAdd = true;
+ int Reg = -1;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+ if (Parser.getTok().is(AsmToken::Identifier))
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Parser.getTok().getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
+ 0, S, E));
+
+ return MatchOperand_Success;
+}
+
+/// cvtT2LdrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateReg(0));
+ // addr
+ ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtT2StrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateReg(0));
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
bool ARMAsmParser::
-CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
}
-/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
bool ARMAsmParser::
-CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
}
-/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
bool ARMAsmParser::
-CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+
+/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
}
-/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
bool ARMAsmParser::
-CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
+ // pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
}
+/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtThumbMultiple- Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The second source operand must be the same register as the destination
+ // operand.
+ if (Operands.size() == 6 &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[5])->getReg()) &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[4])->getReg())) {
+ Error(Operands[3]->getStartLoc(),
+ "destination register must match source register");
+ return false;
+ }
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
+ ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1);
+ // If we have a three-operand form, use that, else the second source operand
+ // is just the destination operand again.
+ if (Operands.size() == 6)
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ else
+ Inst.addOperand(Inst.getOperand(0));
+ ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
+
+ return true;
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
-///
-/// TODO Only preindexing and postindexing addressing are started, unindexed
-/// with option, etc are still to do.
bool ARMAsmParser::
-ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- ARMII::AddrMode AddrMode = ARMII::AddrModeNone) {
+parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S, E;
assert(Parser.getTok().is(AsmToken::LBrac) &&
"Token is not a Left Bracket");
@@ -1552,185 +3105,178 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Parser.Lex(); // Eat left bracket token.
const AsmToken &BaseRegTok = Parser.getTok();
- if (BaseRegTok.isNot(AsmToken::Identifier)) {
- Error(BaseRegTok.getLoc(), "register expected");
- return true;
- }
- int BaseRegNum = TryParseRegister();
- if (BaseRegNum == -1) {
- Error(BaseRegTok.getLoc(), "register expected");
- return true;
- }
+ int BaseRegNum = tryParseRegister();
+ if (BaseRegNum == -1)
+ return Error(BaseRegTok.getLoc(), "register expected");
// The next token must either be a comma or a closing bracket.
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
- return true;
+ return Error(Tok.getLoc(), "malformed memory operand");
- bool Preindexed = false;
- bool Postindexed = false;
- bool OffsetIsReg = false;
- bool Negative = false;
- bool Writeback = false;
- ARMOperand *WBOp = 0;
- int OffsetRegNum = -1;
- bool OffsetRegShifted = false;
- enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl;
- const MCExpr *ShiftAmount = 0;
- const MCExpr *Offset = 0;
-
- // First look for preindexed address forms, that is after the "[Rn" we now
- // have to see if the next token is a comma.
- if (Tok.is(AsmToken::Comma)) {
- Preindexed = true;
- Parser.Lex(); // Eat comma token.
-
- if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
- Offset, OffsetIsReg, OffsetRegNum, E))
- return true;
- const AsmToken &RBracTok = Parser.getTok();
- if (RBracTok.isNot(AsmToken::RBrac)) {
- Error(RBracTok.getLoc(), "']' expected");
- return true;
- }
- E = RBracTok.getLoc();
+ if (Tok.is(AsmToken::RBrac)) {
+ E = Tok.getLoc();
Parser.Lex(); // Eat right bracket token.
- const AsmToken &ExclaimTok = Parser.getTok();
- if (ExclaimTok.is(AsmToken::Exclaim)) {
- // None of addrmode3 instruction uses "!"
- if (AddrMode == ARMII::AddrMode3)
- return true;
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
+ 0, 0, false, S, E));
- WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
- ExclaimTok.getLoc());
- Writeback = true;
- Parser.Lex(); // Eat exclaim token
- } else { // In addressing mode 2, pre-indexed mode always end with "!"
- if (AddrMode == ARMII::AddrMode2)
- Preindexed = false;
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand. It's rather odd, but syntactically valid.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
}
- } else {
- // The "[Rn" we have so far was not followed by a comma.
- // If there's anything other than the right brace, this is a post indexing
- // addressing form.
- E = Tok.getLoc();
- Parser.Lex(); // Eat right bracket token.
+ return false;
+ }
- const AsmToken &NextTok = Parser.getTok();
+ assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!");
+ Parser.Lex(); // Eat the comma.
- if (NextTok.isNot(AsmToken::EndOfStatement)) {
- Postindexed = true;
- Writeback = true;
+ // If we have a ':', it's an alignment specifier.
+ if (Parser.getTok().is(AsmToken::Colon)) {
+ Parser.Lex(); // Eat the ':'.
+ E = Parser.getTok().getLoc();
- if (NextTok.isNot(AsmToken::Comma)) {
- Error(NextTok.getLoc(), "',' expected");
- return true;
- }
-
- Parser.Lex(); // Eat comma token.
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
- if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
- ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
- E))
- return true;
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ unsigned Align = 0;
+ switch (CE->getValue()) {
+ default:
+ return Error(E, "alignment specifier must be 64, 128, or 256 bits");
+ case 64: Align = 8; break;
+ case 128: Align = 16; break;
+ case 256: Align = 32; break;
}
- }
- // Force Offset to exist if used.
- if (!OffsetIsReg) {
- if (!Offset)
- Offset = MCConstantExpr::Create(0, getContext());
- } else {
- if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) {
- Error(E, "shift amount not supported");
- return true;
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
+
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0,
+ ARM_AM::no_shift, 0, Align,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
}
+
+ return false;
}
- Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg,
- Offset, OffsetRegNum, OffsetRegShifted,
- ShiftType, ShiftAmount, Preindexed,
- Postindexed, Negative, Writeback, S, E));
- if (WBOp)
- Operands.push_back(WBOp);
+ // If we have a '#', it's an immediate offset, else assume it's a register
+ // offset.
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat the '#'.
+ E = Parser.getTok().getLoc();
- return false;
-}
+ bool isNegative = getParser().getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ if (getParser().ParseExpression(Offset))
+ return true;
+
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ // If the constant was #-0, represent it as INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ CE = MCConstantExpr::Create(INT32_MIN, getContext());
+
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
-/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
-/// we will parse the following (were +/- means that a plus or minus is
-/// optional):
-/// +/-Rm
-/// +/-Rm, shift
-/// #offset
-/// we return false on success or an error otherwise.
-bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
- bool &OffsetRegShifted,
- enum ARM_AM::ShiftOpc &ShiftType,
- const MCExpr *&ShiftAmount,
- const MCExpr *&Offset,
- bool &OffsetIsReg,
- int &OffsetRegNum,
- SMLoc &E) {
- Negative = false;
- OffsetRegShifted = false;
- OffsetIsReg = false;
- OffsetRegNum = -1;
- const AsmToken &NextTok = Parser.getTok();
- E = NextTok.getLoc();
- if (NextTok.is(AsmToken::Plus))
- Parser.Lex(); // Eat plus token.
- else if (NextTok.is(AsmToken::Minus)) {
- Negative = true;
- Parser.Lex(); // Eat minus token
- }
- // See if there is a register following the "[Rn," or "[Rn]," we have so far.
- const AsmToken &OffsetRegTok = Parser.getTok();
- if (OffsetRegTok.is(AsmToken::Identifier)) {
- SMLoc CurLoc = OffsetRegTok.getLoc();
- OffsetRegNum = TryParseRegister();
- if (OffsetRegNum != -1) {
- OffsetIsReg = true;
- E = CurLoc;
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, CE, 0,
+ ARM_AM::no_shift, 0, 0,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
}
- }
- // If we parsed a register as the offset then there can be a shift after that.
- if (OffsetRegNum != -1) {
- // Look for a comma then a shift
- const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Comma)) {
- Parser.Lex(); // Eat comma token.
+ return false;
+ }
- const AsmToken &Tok = Parser.getTok();
- if (ParseShift(ShiftType, ShiftAmount, E))
- return Error(Tok.getLoc(), "shift expected");
- OffsetRegShifted = true;
- }
+ // The register offset is optionally preceded by a '+' or '-'
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex(); // Eat the '-'.
+ } else if (Parser.getTok().is(AsmToken::Plus)) {
+ // Nothing to do.
+ Parser.Lex(); // Eat the '+'.
}
- else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
- // Look for #offset following the "[Rn," or "[Rn],"
- const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
- return Error(HashTok.getLoc(), "'#' expected");
- Parser.Lex(); // Eat hash token.
+ E = Parser.getTok().getLoc();
+ int OffsetRegNum = tryParseRegister();
+ if (OffsetRegNum == -1)
+ return Error(E, "register expected");
+
+ // If there's a shift operator, handle it.
+ ARM_AM::ShiftOpc ShiftType = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftType, ShiftImm))
+ return true;
+ }
- if (getParser().ParseExpression(Offset))
- return true;
- E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
+ ShiftType, ShiftImm, 0, isNegative,
+ S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
}
+
return false;
}
-/// ParseShift as one of these two:
+/// parseMemRegOffsetShift - one of these two:
/// ( lsl | lsr | asr | ror ) , # shift_amount
/// rrx
-/// and returns true if it parses a shift otherwise it returns false.
-bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St,
- const MCExpr *&ShiftAmount, SMLoc &E) {
+/// return true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
+ unsigned &Amount) {
+ SMLoc Loc = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
return true;
@@ -1746,28 +3292,86 @@ bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St,
else if (ShiftName == "rrx" || ShiftName == "RRX")
St = ARM_AM::rrx;
else
- return true;
+ return Error(Loc, "illegal shift operator");
Parser.Lex(); // Eat shift type token.
- // Rrx stands alone.
- if (St == ARM_AM::rrx)
- return false;
-
- // Otherwise, there must be a '#' and a shift amount.
- const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
- return Error(HashTok.getLoc(), "'#' expected");
- Parser.Lex(); // Eat hash token.
+ // rrx stands alone.
+ Amount = 0;
+ if (St != ARM_AM::rrx) {
+ Loc = Parser.getTok().getLoc();
+ // A '#' and a shift amount.
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash))
+ return Error(HashTok.getLoc(), "'#' expected");
+ Parser.Lex(); // Eat hash token.
- if (getParser().ParseExpression(ShiftAmount))
- return true;
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error(Loc, "shift amount must be an immediate");
+ int64_t Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) ||
+ ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32))
+ return Error(Loc, "immediate shift value out of range");
+ Amount = Imm;
+ }
return false;
}
+/// parseFPImm - A floating point immediate expression operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (Parser.getTok().isNot(AsmToken::Hash))
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the '#'.
+
+ // Handle negation, as that still comes through as a separate token.
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex();
+ }
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Real)) {
+ APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ // If we had a '-' in front, toggle the sign bit.
+ IntVal ^= (uint64_t)isNegative << 63;
+ int Val = ARM_AM::getFP64Imm(APInt(64, IntVal));
+ Parser.Lex(); // Eat the token.
+ if (Val == -1) {
+ TokError("floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ return MatchOperand_Success;
+ }
+ if (Tok.is(AsmToken::Integer)) {
+ int64_t Val = Tok.getIntVal();
+ Parser.Lex(); // Eat the token.
+ if (Val > 255 || Val < 0) {
+ TokError("encoded floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ return MatchOperand_Success;
+ }
+
+ TokError("invalid floating point immediate");
+ return MatchOperand_ParseFail;
+}
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
-bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
StringRef Mnemonic) {
SMLoc S, E;
@@ -1787,13 +3391,20 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- if (!TryParseRegisterWithWriteBack(Operands))
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (!tryParseRegisterWithWriteBack(Operands))
return false;
- int Res = TryParseShiftRegister(Operands);
+ int Res = tryParseShiftRegister(Operands);
if (Res == 0) // success
return false;
else if (Res == -1) // irrecoverable error
return true;
+ if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+ return false;
+ }
// Fall though for the Identifier case that is not a register or a
// special name.
@@ -1811,26 +3422,36 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return false;
}
case AsmToken::LBrac:
- return ParseMemory(Operands);
+ return parseMemory(Operands);
case AsmToken::LCurly:
- return ParseRegisterList(Operands);
- case AsmToken::Hash:
+ return parseRegisterList(Operands);
+ case AsmToken::Hash: {
// #42 -> immediate.
// TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
S = Parser.getTok().getLoc();
Parser.Lex();
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *ImmVal;
if (getParser().ParseExpression(ImmVal))
return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
return false;
+ }
case AsmToken::Colon: {
// ":lower16:" and ":upper16:" expression prefixes
// FIXME: Check it's an expression prefix,
// e.g. (FOO - :lower16:BAR) isn't legal.
ARMMCExpr::VariantKind RefKind;
- if (ParsePrefix(RefKind))
+ if (parsePrefix(RefKind))
return true;
const MCExpr *SubExprVal;
@@ -1846,9 +3467,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
}
}
-// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
// :lower16: and :upper16:.
-bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
+bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
RefKind = ARMMCExpr::VK_ARM_None;
// :lower16: and :upper16: modifiers
@@ -1879,55 +3500,16 @@ bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
return false;
}
-const MCExpr *
-ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E,
- MCSymbolRefExpr::VariantKind Variant) {
- // Recurse over the given expression, rebuilding it to apply the given variant
- // to the leftmost symbol.
- if (Variant == MCSymbolRefExpr::VK_None)
- return E;
-
- switch (E->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle target expr yet");
- case MCExpr::Constant:
- llvm_unreachable("Can't handle lower16/upper16 of constant yet");
-
- case MCExpr::SymbolRef: {
- const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
-
- if (SRE->getKind() != MCSymbolRefExpr::VK_None)
- return 0;
-
- return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
- }
-
- case MCExpr::Unary:
- llvm_unreachable("Can't handle unary expressions yet");
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
- const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant);
- const MCExpr *RHS = BE->getRHS();
- if (!LHS)
- return 0;
-
- return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
- }
- }
-
- assert(0 && "Invalid expression kind!");
- return 0;
-}
-
/// \brief Given a mnemonic, split out possible predication code and carry
/// setting letters to form a canonical mnemonic and flags.
//
// FIXME: Would be nice to autogen this.
-static StringRef SplitMnemonic(StringRef Mnemonic,
- unsigned &PredicationCode,
- bool &CarrySetting,
- unsigned &ProcessorIMod) {
+// FIXME: This is a bit of a maze of special cases.
+StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
+ unsigned &PredicationCode,
+ bool &CarrySetting,
+ unsigned &ProcessorIMod,
+ StringRef &ITMask) {
PredicationCode = ARMCC::AL;
CarrySetting = false;
ProcessorIMod = 0;
@@ -1935,23 +3517,22 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
// Ignore some mnemonics we know aren't predicated forms.
//
// FIXME: Would be nice to autogen this.
- if (Mnemonic == "teq" || Mnemonic == "vceq" ||
- Mnemonic == "movs" ||
- Mnemonic == "svc" ||
- (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
- Mnemonic == "vmls" || Mnemonic == "vnmls") ||
- Mnemonic == "vacge" || Mnemonic == "vcge" ||
- Mnemonic == "vclt" ||
- Mnemonic == "vacgt" || Mnemonic == "vcgt" ||
- Mnemonic == "vcle" ||
- (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
- Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
- Mnemonic == "vqdmlal" || Mnemonic == "bics"))
+ if ((Mnemonic == "movs" && isThumb()) ||
+ Mnemonic == "teq" || Mnemonic == "vceq" || Mnemonic == "svc" ||
+ Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
+ Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
+ Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
+ Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
+ Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
// predicated but do have a carry-set and so weren't caught above.
- if (Mnemonic != "adcs") {
+ if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" &&
+ Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" &&
+ Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" &&
+ Mnemonic != "sbcs" && Mnemonic != "rscs") {
unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
.Case("eq", ARMCC::EQ)
.Case("ne", ARMCC::NE)
@@ -1980,11 +3561,12 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
// Next, determine if we have a carry setting bit. We explicitly ignore all
// the instructions we know end in 's'.
if (Mnemonic.endswith("s") &&
- !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" ||
- Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" ||
- Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
- Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" ||
- Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) {
+ !(Mnemonic == "cps" || Mnemonic == "mls" ||
+ Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
+ Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
+ Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+ (Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
}
@@ -2004,6 +3586,12 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
}
}
+ // The "it" instruction has the condition mask on the end of the mnemonic.
+ if (Mnemonic.startswith("it")) {
+ ITMask = Mnemonic.slice(2, Mnemonic.size());
+ Mnemonic = Mnemonic.slice(0, 2);
+ }
+
return Mnemonic;
}
@@ -2012,37 +3600,154 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
//
// FIXME: It would be nice to autogen this.
void ARMAsmParser::
-GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode) {
if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
- Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
+ Mnemonic == "add" || Mnemonic == "adc" ||
Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
- Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mvn" ||
+ Mnemonic == "orr" || Mnemonic == "mvn" ||
Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
- Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
- Mnemonic == "eor" || Mnemonic == "smlal" ||
- (Mnemonic == "mov" && !isThumbOne())) {
+ Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
+ (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
+ Mnemonic == "mla" || Mnemonic == "smlal" ||
+ Mnemonic == "umlal" || Mnemonic == "umull"))) {
CanAcceptCarrySet = true;
- } else {
+ } else
CanAcceptCarrySet = false;
- }
if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
- Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" ||
- Mnemonic == "clrex" || Mnemonic.startswith("cps")) {
+ Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" ||
+ (Mnemonic == "clrex" && !isThumb()) ||
+ (Mnemonic == "nop" && isThumbOne()) ||
+ ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" ||
+ Mnemonic == "ldc2" || Mnemonic == "ldc2l" ||
+ Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) ||
+ ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) &&
+ !isThumb()) ||
+ Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) {
CanAcceptPredicationCode = false;
- } else {
+ } else
CanAcceptPredicationCode = true;
- }
- if (isThumb())
+ if (isThumb()) {
if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
CanAcceptPredicationCode = false;
+ }
+}
+
+bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // FIXME: This is all horribly hacky. We really need a better way to deal
+ // with optional operands like this in the matcher table.
+
+ // The 'mov' mnemonic is special. One variant has a cc_out operand, while
+ // another does not. Specifically, the MOVW instruction does not. So we
+ // special case it here and remove the defaulted (non-setting) cc_out
+ // operand if that's the instruction we're trying to match.
+ //
+ // We do this as post-processing of the explicit operands rather than just
+ // conditionally adding the cc_out in the first place because we need
+ // to check the type of the parsed immediate operand.
+ if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
+ !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() &&
+ static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when there are only two register operands.
+ if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do
+ // have to check the immediate range here since Thumb2 has a variant
+ // that can handle a different range and has a cc_out operand.
+ if (((isThumb() && Mnemonic == "add") ||
+ (isThumbTwo() && Mnemonic == "sub")) &&
+ Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ (static_cast<ARMOperand*>(Operands[5])->isReg() ||
+ static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
+ return true;
+ // For Thumb2, add/sub immediate does not have a cc_out operand for the
+ // imm0_4095 variant. That's the least-preferred variant when
+ // selecting via the generic "add" mnemonic, so to know that we
+ // should remove the cc_out operand, we have to explicitly check that
+ // it's not one of the other variants. Ugh.
+ if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ // Nest conditions rather than one big 'if' statement for readability.
+ //
+ // If either register is a high reg, it's either one of the SP
+ // variants (handled above) or a 32-bit encoding, so we just
+ // check against T3.
+ if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) &&
+ static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
+ return false;
+ // If both registers are low, we're in an IT block, and the immediate is
+ // in range, we should use encoding T1 instead, which has a cc_out.
+ if (inITBlock() &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) &&
+ static_cast<ARMOperand*>(Operands[5])->isImm0_7())
+ return false;
+
+ // Otherwise, we use encoding T4, which does not have a cc_out
+ // operand.
+ return true;
+ }
+
+ // The thumb2 multiply instruction doesn't have a CCOut register, so
+ // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to
+ // use the 16-bit encoding or not.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[5])->isReg() &&
+ // If the registers aren't low regs, the destination reg isn't the
+ // same as one of the source regs, or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !inITBlock() ||
+ (static_cast<ARMOperand*>(Operands[3])->getReg() !=
+ static_cast<ARMOperand*>(Operands[5])->getReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() !=
+ static_cast<ARMOperand*>(Operands[4])->getReg())))
+ return true;
+
+
+
+ // Register-register 'add/sub' for thumb does not have a cc_out operand
+ // when it's an ADD/SUB SP, #imm. Be lenient on count since there's also
+ // the "add/sub SP, SP, #imm" version. If the follow-up operands aren't
+ // right, this will result in better diagnostics (which operand is off)
+ // anyway.
+ if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ (Operands.size() == 5 || Operands.size() == 6) &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+
+ return false;
}
/// Parse an arm instruction mnemonic followed by its operands.
@@ -2050,16 +3755,51 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
- StringRef Head = Name.slice(Start, Next);
+ StringRef Mnemonic = Name.slice(Start, Next);
// Split out the predication code and carry setting flag from the mnemonic.
unsigned PredicationCode;
unsigned ProcessorIMod;
bool CarrySetting;
- Head = SplitMnemonic(Head, PredicationCode, CarrySetting,
- ProcessorIMod);
+ StringRef ITMask;
+ Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting,
+ ProcessorIMod, ITMask);
+
+ // In Thumb1, only the branch (B) instruction can be predicated.
+ if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
+ Parser.EatToEndOfStatement();
+ return Error(NameLoc, "conditional execution not supported in Thumb1");
+ }
+
+ Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc));
- Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
+ // Handle the IT instruction ITMask. Convert it to a bitmask. This
+ // is the mask as it will be for the IT encoding if the conditional
+ // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case
+ // where the conditional bit0 is zero, the instruction post-processing
+ // will adjust the mask accordingly.
+ if (Mnemonic == "it") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
+ if (ITMask.size() > 3) {
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "too many conditions on IT instruction");
+ }
+ unsigned Mask = 8;
+ for (unsigned i = ITMask.size(); i != 0; --i) {
+ char pos = ITMask[i - 1];
+ if (pos != 't' && pos != 'e') {
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
+ }
+ Mask >>= 1;
+ if (ITMask[i - 1] == 't')
+ Mask |= 8;
+ }
+ Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
+ }
+
+ // FIXME: This is all a pretty gross hack. We should automatically handle
+ // optional operands like this via tblgen.
// Next, add the CCOut and ConditionCode operands, if needed.
//
@@ -2069,34 +3809,36 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
// the matcher deal with finding the right instruction or generating an
// appropriate error.
bool CanAcceptCarrySet, CanAcceptPredicationCode;
- GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
+ getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
// If we had a carry-set on an instruction that can't do that, issue an
// error.
if (!CanAcceptCarrySet && CarrySetting) {
Parser.EatToEndOfStatement();
- return Error(NameLoc, "instruction '" + Head +
+ return Error(NameLoc, "instruction '" + Mnemonic +
"' can not set flags, but 's' suffix specified");
}
+ // If we had a predication code on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
+ Parser.EatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' is not predicable, but condition code specified");
+ }
// Add the carry setting operand, if necessary.
- //
- // FIXME: It would be awesome if we could somehow invent a location such that
- // match errors on this operand would print a nice diagnostic about how the
- // 's' character in the mnemonic resulted in a CCOut operand.
- if (CanAcceptCarrySet)
+ if (CanAcceptCarrySet) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size());
Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
- NameLoc));
+ Loc));
+ }
// Add the predication code operand, if necessary.
if (CanAcceptPredicationCode) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
+ CarrySetting);
Operands.push_back(ARMOperand::CreateCondCode(
- ARMCC::CondCodes(PredicationCode), NameLoc));
- } else {
- // This mnemonic can't ever accept a predication code, but the user wrote
- // one (or misspelled another mnemonic).
-
- // FIXME: Issue a nice error.
+ ARMCC::CondCodes(PredicationCode), Loc));
}
// Add the processor imod operand, if necessary.
@@ -2104,11 +3846,6 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.push_back(ARMOperand::CreateImm(
MCConstantExpr::Create(ProcessorIMod, getContext()),
NameLoc, NameLoc));
- } else {
- // This mnemonic can't ever accept a imod, but the user wrote
- // one (or misspelled another mnemonic).
-
- // FIXME: Issue a nice error.
}
// Add the remaining tokens in the mnemonic.
@@ -2117,13 +3854,19 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
Next = Name.find('.', Start + 1);
StringRef ExtraToken = Name.slice(Start, Next);
- Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc));
+ // For now, we're only parsing Thumb1 (for the most part), so
+ // just ignore ".n" qualifiers. We'll use them to restrict
+ // matching when we do Thumb2.
+ if (ExtraToken != ".n") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
+ Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
+ }
}
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (ParseOperand(Operands, Head)) {
+ if (parseOperand(Operands, Mnemonic)) {
Parser.EatToEndOfStatement();
return true;
}
@@ -2132,7 +3875,7 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
- if (ParseOperand(Operands, Head)) {
+ if (parseOperand(Operands, Mnemonic)) {
Parser.EatToEndOfStatement();
return true;
}
@@ -2140,75 +3883,548 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
Parser.EatToEndOfStatement();
- return TokError("unexpected token in argument list");
+ return Error(Loc, "unexpected token in argument list");
}
Parser.Lex(); // Consume the EndOfStatement
+
+ // Some instructions, mostly Thumb, have forms for the same mnemonic that
+ // do and don't have a cc_out optional-def operand. With some spot-checks
+ // of the operand list, we can figure out which variant we're trying to
+ // parse and adjust accordingly before actually matching. We shouldn't ever
+ // try to remove a cc_out operand that was explicitly set on the the
+ // mnemonic, of course (CarrySetting == true). Reason number #317 the
+ // table driven matcher doesn't fit well with the ARM instruction set.
+ if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
+ // ARM mode 'blx' need special handling, as the register operand version
+ // is predicable, but the label operand version is not. So, we can't rely
+ // on the Mnemonic based checking to correctly figure out when to put
+ // a k_CondCode operand in the list. If we're trying to match the label
+ // version, remove the k_CondCode operand here.
+ if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
+ static_cast<ARMOperand*>(Operands[2])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
+ // The vector-compare-to-zero instructions have a literal token "#0" at
+ // the end that comes to here as an immediate operand. Convert it to a
+ // token to play nicely with the matcher.
+ if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" ||
+ Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 5);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+ // VCMP{E} does the same thing, but with a different operand count.
+ if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[4])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 4);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+ // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
+ // end. Convert it to a token here.
+ if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 5);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+
+ return false;
+}
+
+// Validate context-sensitive operand constraints.
+
+// return 'true' if register list contains non-low GPR registers,
+// 'false' otherwise. If Reg is in the register list or is HiReg, set
+// 'containsReg' to true.
+static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
+ unsigned HiReg, bool &containsReg) {
+ containsReg = false;
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ containsReg = true;
+ // Anything other than a low register isn't legal here.
+ if (!isARMLowRegister(OpReg) && (!HiReg || OpReg != HiReg))
+ return true;
+ }
+ return false;
+}
+
+// Check if the specified regisgter is in the register list of the inst,
+// starting at the indicated operand number.
+static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ return true;
+ }
+ return false;
+}
+
+// FIXME: We would really prefer to have MCInstrInfo (the wrapper around
+// the ARMInsts array) instead. Getting that here requires awkward
+// API changes, though. Better way?
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
+static MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return ARMInsts[Opcode];
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool ARMAsmParser::
+validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ SMLoc Loc = Operands[0]->getStartLoc();
+ // Check the IT block state first.
+ // NOTE: In Thumb mode, the BKPT instruction has the interesting property of
+ // being allowed in IT blocks, but not being predicable. It just always
+ // executes.
+ if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) {
+ unsigned bit = 1;
+ if (ITState.FirstCond)
+ ITState.FirstCond = false;
+ else
+ bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+ // The instruction must be predicable.
+ if (!MCID.isPredicable())
+ return Error(Loc, "instructions in IT block must be predicable");
+ unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
+ unsigned ITCond = bit ? ITState.Cond :
+ ARMCC::getOppositeCondition(ITState.Cond);
+ if (Cond != ITCond) {
+ // Find the condition code Operand to get its SMLoc information.
+ SMLoc CondLoc;
+ for (unsigned i = 1; i < Operands.size(); ++i)
+ if (static_cast<ARMOperand*>(Operands[i])->isCondCode())
+ CondLoc = Operands[i]->getStartLoc();
+ return Error(CondLoc, "incorrect condition in IT block; got '" +
+ StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
+ "', but expected '" +
+ ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'");
+ }
+ // Check for non-'al' condition codes outside of the IT block.
+ } else if (isThumbTwo() && MCID.isPredicable() &&
+ Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
+ ARMCC::AL && Inst.getOpcode() != ARM::tB &&
+ Inst.getOpcode() != ARM::t2B)
+ return Error(Loc, "predicated instructions must be in IT block");
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ case ARM::LDREXD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands must be sequential");
+ return false;
+ }
+ case ARM::STRD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::STREXD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::SBFX:
+ case ARM::UBFX: {
+ // width must be in range [1, 32-lsb]
+ unsigned lsb = Inst.getOperand(2).getImm();
+ unsigned widthm1 = Inst.getOperand(3).getImm();
+ if (widthm1 >= 32 - lsb)
+ return Error(Operands[5]->getStartLoc(),
+ "bitfield width must be in range [1,32-lsb]");
+ return false;
+ }
+ case ARM::tLDMIA: {
+ // If we're parsing Thumb2, the .w variant is available and handles
+ // most cases that are normally illegal for a Thumb1 LDM
+ // instruction. We'll make the transformation in processInstruction()
+ // if necessary.
+ //
+ // Thumb LDM instructions are writeback iff the base register is not
+ // in the register list.
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo())
+ return Error(Operands[3 + hasWritebackToken]->getStartLoc(),
+ "registers must be in range r0-r7");
+ // If we should have writeback, then there should be a '!' token.
+ if (!listContainsBase && !hasWritebackToken && !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "writeback operator '!' expected");
+ // If we should not have writeback, there must not be a '!'. This is
+ // true even for the 32-bit wide encodings.
+ if (listContainsBase && hasWritebackToken)
+ return Error(Operands[3]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+
+ break;
+ }
+ case ARM::t2LDMIA_UPD: {
+ if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+ return Error(Operands[4]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+ break;
+ }
+ case ARM::tPOP: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase))
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or pc");
+ break;
+ }
+ case ARM::tPUSH: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase))
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or lr");
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo())
+ return Error(Operands[4]->getStartLoc(),
+ "registers must be in range r0-r7");
+ break;
+ }
+ }
+
return false;
}
+void ARMAsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ switch (Inst.getOpcode()) {
+ case ARM::LDMIA_UPD:
+ // If this is a load of a single register via a 'pop', then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDR_POST_IMM);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // am2offset
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ case ARM::STMDB_UPD:
+ // If this is a store of a single register via a 'push', then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STR_PRE_IMM);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // addrmode_imm12
+ TmpInst.addOperand(MCOperand::CreateImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ case ARM::tADDi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ Inst.setOpcode(ARM::tADDi3);
+ break;
+ case ARM::tSUBi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ Inst.setOpcode(ARM::tSUBi3);
+ break;
+ case ARM::tB:
+ // A Thumb conditional branch outside of an IT block is a tBcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ Inst.setOpcode(ARM::tBcc);
+ break;
+ case ARM::t2B:
+ // A Thumb2 conditional branch outside of an IT block is a t2Bcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ Inst.setOpcode(ARM::t2Bcc);
+ break;
+ case ARM::t2Bcc:
+ // If the conditional is AL or we're in an IT block, we really want t2B.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock())
+ Inst.setOpcode(ARM::t2B);
+ break;
+ case ARM::tBcc:
+ // If the conditional is AL, we really want tB.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL)
+ Inst.setOpcode(ARM::tB);
+ break;
+ case ARM::tLDMIA: {
+ // If the register list contains any high registers, or if the writeback
+ // doesn't match what tLDMIA can do, we need to use the 32-bit encoding
+ // instead if we're in Thumb2. Otherwise, this should have generated
+ // an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) ||
+ (!listContainsBase && !hasWritebackToken) ||
+ (listContainsBase && hasWritebackToken)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(hasWritebackToken ? ARM::t2LDMIA_UPD : ARM::t2LDMIA);
+ // If we're switching to the updating version, we need to insert
+ // the writeback tied operand.
+ if (hasWritebackToken)
+ Inst.insert(Inst.begin(),
+ MCOperand::CreateReg(Inst.getOperand(0).getReg()));
+ }
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, Rn, 0, listContainsBase)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMIA_UPD);
+ }
+ break;
+ }
+ case ARM::t2MOVi: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ Inst.getOperand(1).getImm() <= 255 &&
+ ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ // The operands aren't in the same order for tMOVi8...
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2MOVr: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ // The operands aren't the same for tMOV[S]r... (no cc_out)
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2SXTH:
+ case ARM::t2SXTB:
+ case ARM::t2UXTH:
+ case ARM::t2UXTB: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == 0 &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Illegal opcode!");
+ case ARM::t2SXTH: NewOpc = ARM::tSXTH; break;
+ case ARM::t2SXTB: NewOpc = ARM::tSXTB; break;
+ case ARM::t2UXTH: NewOpc = ARM::tUXTH; break;
+ case ARM::t2UXTB: NewOpc = ARM::tUXTB; break;
+ }
+ // The operands aren't the same for thumb1 (no rotate operand).
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2IT: {
+ // The mask bits for all but the first condition are represented as
+ // the low bit of the condition code value implies 't'. We currently
+ // always have 1 implies 't', so XOR toggle the bits if the low bit
+ // of the condition code is zero. The encoding also expects the low
+ // bit of the condition to be encoded as bit 4 of the mask operand,
+ // so mask that in if needed
+ MCOperand &MO = Inst.getOperand(1);
+ unsigned Mask = MO.getImm();
+ unsigned OrigMask = Mask;
+ unsigned TZ = CountTrailingZeros_32(Mask);
+ if ((Inst.getOperand(0).getImm() & 1) == 0) {
+ assert(Mask && TZ <= 3 && "illegal IT mask value!");
+ for (unsigned i = 3; i != TZ; --i)
+ Mask ^= 1 << i;
+ } else
+ Mask |= 0x10;
+ MO.setImm(Mask);
+
+ // Set up the IT block state according to the IT instruction we just
+ // matched.
+ assert(!inITBlock() && "nested IT blocks?!");
+ ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
+ ITState.Mask = OrigMask; // Use the original mask, not the updated one.
+ ITState.CurPosition = 0;
+ ITState.FirstCond = true;
+ break;
+ }
+ }
+}
+
+unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ // 16-bit thumb arithmetic instructions either require or preclude the 'S'
+ // suffix depending on whether they're in an IT block or not.
+ unsigned Opc = Inst.getOpcode();
+ MCInstrDesc &MCID = getInstDesc(Opc);
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
+ assert(MCID.hasOptionalDef() &&
+ "optionally flag setting instruction missing optional def operand");
+ assert(MCID.NumOperands == Inst.getNumOperands() &&
+ "operand count mismatch!");
+ // Find the optional-def operand (cc_out).
+ unsigned OpNo;
+ for (OpNo = 0;
+ !MCID.OpInfo[OpNo].isOptionalDef() && OpNo < MCID.NumOperands;
+ ++OpNo)
+ ;
+ // If we're parsing Thumb1, reject it completely.
+ if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR)
+ return Match_MnemonicFail;
+ // If we're parsing Thumb2, which form is legal depends on whether we're
+ // in an IT block.
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR &&
+ !inITBlock())
+ return Match_RequiresITBlock;
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
+ inITBlock())
+ return Match_RequiresNotITBlock;
+ }
+ // Some high-register supporting Thumb1 encodings only allow both registers
+ // to be from r0-r7 when in Thumb2.
+ else if (Opc == ARM::tADDhirr && isThumbOne() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()))
+ return Match_RequiresThumb2;
+ // Others only require ARMv6 or later.
+ else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() &&
+ isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()))
+ return Match_RequiresV6;
+ return Match_Success;
+}
+
bool ARMAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
MCInst Inst;
unsigned ErrorInfo;
- MatchResultTy MatchResult, MatchResult2;
+ unsigned MatchResult;
MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
- if (MatchResult != Match_Success) {
- // If we get a Match_InvalidOperand it might be some arithmetic instruction
- // that does not update the condition codes. So try adding a CCOut operand
- // with a value of reg0.
- if (MatchResult == Match_InvalidOperand) {
- Operands.insert(Operands.begin() + 1,
- ARMOperand::CreateCCOut(0,
- ((ARMOperand*)Operands[0])->getStartLoc()));
- MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
- if (MatchResult2 == Match_Success)
- MatchResult = Match_Success;
- else {
- ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
- Operands.erase(Operands.begin() + 1);
- delete CCOut;
- }
- }
- // If we get a Match_MnemonicFail it might be some arithmetic instruction
- // that updates the condition codes if it ends in 's'. So see if the
- // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
- // operand with a value of CPSR.
- else if (MatchResult == Match_MnemonicFail) {
- // Get the instruction mnemonic, which is the first token.
- StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
- if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
- // removed the 's' from the mnemonic for matching.
- StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1);
- SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc();
- ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
- Operands.erase(Operands.begin());
- delete OldMnemonic;
- Operands.insert(Operands.begin(),
- ARMOperand::CreateToken(MnemonicNoS, NameLoc));
- Operands.insert(Operands.begin() + 1,
- ARMOperand::CreateCCOut(ARM::CPSR, NameLoc));
- MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
- if (MatchResult2 == Match_Success)
- MatchResult = Match_Success;
- else {
- ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
- Operands.erase(Operands.begin());
- delete OldMnemonic;
- Operands.insert(Operands.begin(),
- ARMOperand::CreateToken(Mnemonic, NameLoc));
- ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
- Operands.erase(Operands.begin() + 1);
- delete CCOut;
- }
- }
- }
- }
switch (MatchResult) {
+ default: break;
case Match_Success:
+ // Context sensitive operand constraints aren't handled by the matcher,
+ // so check them here.
+ if (validateInstruction(Inst, Operands)) {
+ // Still progress the IT block, otherwise one wrong condition causes
+ // nasty cascading errors.
+ forwardITPosition();
+ return true;
+ }
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected.
+ processInstruction(Inst, Operands);
+
+ // Only move forward at the very end so that everything in validate
+ // and process gets a consistent answer about whether we're in an IT
+ // block.
+ forwardITPosition();
+
Out.EmitInstruction(Inst);
return false;
case Match_MissingFeature:
@@ -2227,34 +4443,43 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(ErrorLoc, "invalid operand for instruction");
}
case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
+ return Error(IDLoc, "invalid instruction");
case Match_ConversionFail:
- return Error(IDLoc, "unable to convert operands to instruction");
+ // The converter function will have already emited a diagnostic.
+ return true;
+ case Match_RequiresNotITBlock:
+ return Error(IDLoc, "flag setting instruction only valid outside IT block");
+ case Match_RequiresITBlock:
+ return Error(IDLoc, "instruction only valid inside IT block");
+ case Match_RequiresV6:
+ return Error(IDLoc, "instruction variant requires ARMv6 or later");
+ case Match_RequiresThumb2:
+ return Error(IDLoc, "instruction variant requires Thumb2");
}
llvm_unreachable("Implement any new match types added!");
return true;
}
-/// ParseDirective parses the arm specific directives
+/// parseDirective parses the arm specific directives
bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
- return ParseDirectiveWord(4, DirectiveID.getLoc());
+ return parseDirectiveWord(4, DirectiveID.getLoc());
else if (IDVal == ".thumb")
- return ParseDirectiveThumb(DirectiveID.getLoc());
+ return parseDirectiveThumb(DirectiveID.getLoc());
else if (IDVal == ".thumb_func")
- return ParseDirectiveThumbFunc(DirectiveID.getLoc());
+ return parseDirectiveThumbFunc(DirectiveID.getLoc());
else if (IDVal == ".code")
- return ParseDirectiveCode(DirectiveID.getLoc());
+ return parseDirectiveCode(DirectiveID.getLoc());
else if (IDVal == ".syntax")
- return ParseDirectiveSyntax(DirectiveID.getLoc());
+ return parseDirectiveSyntax(DirectiveID.getLoc());
return true;
}
-/// ParseDirectiveWord
+/// parseDirectiveWord
/// ::= .word [ expression (, expression)* ]
-bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -2277,9 +4502,9 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-/// ParseDirectiveThumb
+/// parseDirectiveThumb
/// ::= .thumb
-bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
+bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(L, "unexpected token in directive");
Parser.Lex();
@@ -2290,9 +4515,9 @@ bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
return false;
}
-/// ParseDirectiveThumbFunc
+/// parseDirectiveThumbFunc
/// ::= .thumbfunc symbol_name
-bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
+bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
bool isMachO = MAI.hasSubsectionsViaSymbols();
StringRef Name;
@@ -2322,9 +4547,9 @@ bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
return false;
}
-/// ParseDirectiveSyntax
+/// parseDirectiveSyntax
/// ::= .syntax unified | divided
-bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
+bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
return Error(L, "unexpected token in .syntax directive");
@@ -2345,9 +4570,9 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
return false;
}
-/// ParseDirectiveCode
+/// parseDirectiveCode
/// ::= .code 16 | 32
-bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
+bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Integer))
return Error(L, "unexpected token in .code directive");
@@ -2380,8 +4605,8 @@ extern "C" void LLVMInitializeARMAsmLexer();
/// Force static initialization.
extern "C" void LLVMInitializeARMAsmParser() {
- RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
- RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+ RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
+ RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget);
LLVMInitializeARMAsmLexer();
}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index bdce2c4..8f2f813 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -6,584 +6,4077 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains code to implement the public interfaces of ARMDisassembler and
-// ThumbDisassembler, both of which are instances of MCDisassembler.
-//
-//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-disassembler"
-#include "ARMDisassembler.h"
-#include "ARMDisassemblerCore.h"
-
-#include "llvm/ADT/OwningPtr.h"
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-//#define DEBUG(X) do { X; } while (0)
-
-/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
-/// ARMDecoderEmitter.cpp TableGen backend. It contains:
-///
-/// o Mappings from opcode to ARM/Thumb instruction format
-///
-/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
-/// for an ARM instruction.
-///
-/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
-/// function for a Thumb instruction.
-///
-#include "ARMGenDecoderTables.inc"
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+namespace {
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ARMDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~ARMDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+};
+
+/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ThumbDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~ThumbDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+ mutable std::vector<unsigned> ITBlock;
+ DecodeStatus AddThumbPredicate(MCInst&) const;
+ void UpdateThumbVFPPredicate(MCInst&) const;
+};
+}
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+ switch (In) {
+ case MCDisassembler::Success:
+ // Out stays the same.
+ return true;
+ case MCDisassembler::SoftFail:
+ Out = In;
+ return true;
+ case MCDisassembler::Fail:
+ Out = In;
+ return false;
+ }
+ return false;
+}
+
+
+// Forward declare these because the autogenerated code will reference them.
+// Definitions are further down.
+static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst,
+ unsigned Insn,
+ uint64_t Adddress,
+ const void *Decoder);
+static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+
+
+#include "ARMGenDisassemblerTables.inc"
+#include "ARMGenInstrInfo.inc"
#include "ARMGenEDInfo.inc"
-using namespace llvm;
+static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new ARMDisassembler(STI);
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new ThumbDisassembler(STI);
+}
-/// showBitVector - Use the raw_ostream to log a diagnostic message describing
-/// the inidividual bits of the instruction.
-///
-static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
- // Split the bit position markers into more than one lines to fit 80 columns.
- os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11"
- << " 10 9 8 7 6 5 4 3 2 1 0 \n";
- os << "---------------------------------------------------------------"
- << "----------------------------------\n";
- os << '|';
- for (unsigned i = 32; i != 0; --i) {
- if (insn >> (i - 1) & 0x01)
- os << " 1";
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
+
+DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ assert(!(STI.getFeatureBits() & ARM::ModeThumb) &&
+ "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ uint32_t insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ // VFP and NEON instructions, similarly, are shared between ARM
+ // and Thumb modes.
+ MI.clear();
+ result = decodeVFPInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDataInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDupInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
+
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst. The immediate Value has had any PC
+/// adjustment made by the caller. If the instruction is a branch instruction
+/// then isBranch is true, else false. If the getOpInfo() function was set as
+/// part of the setupForSymbolicDisassembly() call then that function is called
+/// to get any symbolic information at the Address for this instruction. If
+/// that returns non-zero then the symbolic information it returns is used to
+/// create an MCExpr and that is added as an operand to the MCInst. If
+/// getOpInfo() returns zero and isBranch is true then a symbol look up for
+/// Value is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with Value is created. This function returns true if it adds an
+/// operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
+ bool isBranch, uint64_t InstSize,
+ MCInst &MI, const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
+ if (!getOpInfo)
+ return false;
+
+ struct LLVMOpInfo1 SymbolicOp;
+ SymbolicOp.Value = Value;
+ void *DisInfo = Dis->getDisInfoBlock();
+ if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ if (isBranch) {
+ LLVMSymbolLookupCallback SymbolLookUp =
+ Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ uint64_t ReferenceType;
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ const char *ReferenceName;
+ const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+ &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
+ SymbolicOp.Value = 0;
+ }
+ else {
+ SymbolicOp.Value = Value;
+ }
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+ }
+ else {
+ return false;
+ }
+ }
+ else {
+ return false;
+ }
+ }
+
+ MCContext *Ctx = Dis->getMCContext();
+ const MCExpr *Add = NULL;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Sub = NULL;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Off = NULL;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
else
- os << " 0";
- os << (i%4 == 1 ? '|' : ':');
- }
- os << '\n';
- // Split the bit position markers into more than one lines to fit 80 columns.
- os << "---------------------------------------------------------------"
- << "----------------------------------\n";
- os << '\n';
-}
-
-/// decodeARMInstruction is a decorator function which tries special cases of
-/// instruction matching before calling the auto-generated decoder function.
-static unsigned decodeARMInstruction(uint32_t &insn) {
- if (slice(insn, 31, 28) == 15)
- goto AutoGenedDecoder;
-
- // Special case processing, if any, goes here....
-
- // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
- // The insufficient encoding information of the combined instruction confuses
- // the decoder wrt BFC/BFI. Therefore, we try to recover here.
- // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
- // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
- if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
- if (slice(insn, 3, 0) == 15)
- return ARM::BFC;
+ LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
else
- return ARM::BFI;
- }
-
- // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
- // A1 & A2.
- // As a result, the decoder fails to deocode USAT properly.
- if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
- return ARM::USAT;
- // As a result, the decoder fails to deocode UQADD16 properly.
- if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1)
- return ARM::UQADD16;
-
- // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
- // As a result, the decoder fails to decode UMULL properly.
- if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
- return ARM::UMULL;
- }
-
- // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
- // As a result, the decoder fails to decode SBFX properly.
- if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
- return ARM::SBFX;
-
- // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
- // As a result, the decoder fails to decode UBFX properly.
- if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
- return ARM::UBFX;
-
- // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
- // As a result, the decoder fails to deocode SSAT properly.
- if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
- return ARM::SSAT;
-
- // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
- // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
- if (slice(insn, 27, 24) == 0) {
- switch (slice(insn, 21, 20)) {
- case 2:
- switch (slice(insn, 7, 4)) {
- case 11:
- return ARM::STRHT;
- default:
- break; // fallthrough
+ Expr = LHS;
+ } else if (Add) {
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off != 0)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, *Ctx);
+ }
+
+ if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ else
+ assert(0 && "bad SymbolicOp.VariantKind");
+
+ return true;
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the Pc.
+/// These can often be values in a literal pool near the Address of the
+/// instruction. The Address of the instruction and its immediate Value are
+/// used as a possible literal pool entry. The SymbolLookUp call back will
+/// return the name of a symbol referenced by the the literal pool's entry if
+/// the referenced address is that of a symbol. Or it will return a pointer to
+/// a literal 'C' string if the referenced address of the literal pool's entry
+/// is an address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ void *DisInfo = Dis->getDisInfoBlock();
+ uint64_t ReferenceType;
+ ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
+ const char *ReferenceName;
+ (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr ||
+ ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+ (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
+ }
+}
+
+// Thumb1 instructions don't have explicit S bits. Rather, they
+// implicitly set CPSR. Since it's not represented in the encoding, the
+// auto-generated decoder won't inject the CPSR operand. We need to fix
+// that as a post-pass.
+static void AddThumb1SBit(MCInst &MI, bool InITBlock) {
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isOptionalDef() && OpInfo[i].RegClass == ARM::CCRRegClassID) {
+ if (i > 0 && OpInfo[i-1].isPredicate()) continue;
+ MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+ return;
+ }
+ }
+
+ MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+}
+
+// Most Thumb instructions don't have explicit predicates in the
+// encoding, but rather get their predicates from IT context. We need
+// to fix up the predicate operands using this context information as a
+// post-pass.
+MCDisassembler::DecodeStatus
+ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
+ MCDisassembler::DecodeStatus S = Success;
+
+ // A few instructions actually have predicates encoded in them. Don't
+ // try to overwrite it if we're seeing one of those.
+ switch (MI.getOpcode()) {
+ case ARM::tBcc:
+ case ARM::t2Bcc:
+ case ARM::tCBZ:
+ case ARM::tCBNZ:
+ case ARM::tCPS:
+ case ARM::t2CPS3p:
+ case ARM::t2CPS2p:
+ case ARM::t2CPS1p:
+ case ARM::tMOVSr:
+ case ARM::tSETEND:
+ // Some instructions (mostly conditional branches) are not
+ // allowed in IT blocks.
+ if (!ITBlock.empty())
+ S = SoftFail;
+ else
+ return Success;
+ break;
+ case ARM::tB:
+ case ARM::t2B:
+ case ARM::t2TBB:
+ case ARM::t2TBH:
+ // Some instructions (mostly unconditional branches) can
+ // only appears at the end of, or outside of, an IT.
+ if (ITBlock.size() > 1)
+ S = SoftFail;
+ break;
+ default:
+ break;
+ }
+
+ // If we're in an IT block, base the predicate on that. Otherwise,
+ // assume a predicate of AL.
+ unsigned CC;
+ if (!ITBlock.empty()) {
+ CC = ITBlock.back();
+ if (CC == 0xF)
+ CC = ARMCC::AL;
+ ITBlock.pop_back();
+ } else
+ CC = ARMCC::AL;
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isPredicate()) {
+ I = MI.insert(I, MCOperand::CreateImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::CreateReg(0));
+ else
+ MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+ return S;
+ }
+ }
+
+ I = MI.insert(I, MCOperand::CreateImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::CreateReg(0));
+ else
+ MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+
+ return S;
+}
+
+// Thumb VFP instructions are a special case. Because we share their
+// encodings between ARM and Thumb modes, and they are predicable in ARM
+// mode, the auto-generated decoder will give them an (incorrect)
+// predicate operand. We need to rewrite these operands based on the IT
+// context as a post-pass.
+void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
+ unsigned CC;
+ if (!ITBlock.empty()) {
+ CC = ITBlock.back();
+ ITBlock.pop_back();
+ } else
+ CC = ARMCC::AL;
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ MCInst::iterator I = MI.begin();
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (OpInfo[i].isPredicate() ) {
+ I->setImm(CC);
+ ++I;
+ if (CC == ARMCC::AL)
+ I->setReg(0);
+ else
+ I->setReg(ARM::CPSR);
+ return;
+ }
+ }
+}
+
+DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ assert((STI.getFeatureBits() & ARM::ModeThumb) &&
+ "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
+
+ // We want to read exactly 2 bytes of data.
+ if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint16_t insn16 = (bytes[1] << 8) | bytes[0];
+ DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 2;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
+ if (result) {
+ Size = 2;
+ bool InITBlock = !ITBlock.empty();
+ Check(result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumb2Instruction16(MI, insn16, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 2;
+
+ // Nested IT blocks are UNPREDICTABLE. Must be checked before we add
+ // the Thumb predicate.
+ if (MI.getOpcode() == ARM::t2IT && !ITBlock.empty())
+ result = MCDisassembler::SoftFail;
+
+ Check(result, AddThumbPredicate(MI));
+
+ // If we find an IT instruction, we need to parse its condition
+ // code and mask operands so that we can apply them correctly
+ // to the subsequent instructions.
+ if (MI.getOpcode() == ARM::t2IT) {
+
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned firstcond = MI.getOperand(0).getImm();
+ unsigned Mask = MI.getOperand(1).getImm();
+ unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ ITBlock.insert(ITBlock.begin(), firstcond);
+ else
+ ITBlock.insert(ITBlock.begin(), firstcond ^ 1);
}
+
+ ITBlock.push_back(firstcond);
+ }
+
+ return result;
+ }
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint32_t insn32 = (bytes[3] << 8) |
+ (bytes[2] << 0) |
+ (bytes[1] << 24) |
+ (bytes[0] << 16);
+ MI.clear();
+ result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ bool InITBlock = ITBlock.size();
+ Check(result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumb2Instruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ MI.clear();
+ result = decodeVFPInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ UpdateThumbVFPPredicate(MI);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) {
+ MI.clear();
+ uint32_t NEONLdStInsn = insn32;
+ NEONLdStInsn &= 0xF0FFFFFF;
+ NEONLdStInsn |= 0x04000000;
+ result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
+ if (fieldFromInstruction32(insn32, 24, 4) == 0xF) {
+ MI.clear();
+ uint32_t NEONDataInsn = insn32;
+ NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONDataInsn |= 0x12000000; // Set bits 28 and 25
+ result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+
+extern "C" void LLVMInitializeARMDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+ createARMDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+ createThumbDisassembler);
+}
+
+static const unsigned GPRDecoderTable[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::SP, ARM::LR, ARM::PC
+};
+
+static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo == 15) return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = ARM::R0;
+ break;
+ case 1:
+ Register = ARM::R1;
+ break;
+ case 2:
+ Register = ARM::R2;
break;
case 3:
- switch (slice(insn, 7, 4)) {
- case 11:
- return ARM::LDRHT;
- case 13:
- return ARM::LDRSBT;
- case 15:
- return ARM::LDRSHT;
- default:
- break; // fallthrough
- }
+ Register = ARM::R3;
+ break;
+ case 9:
+ Register = ARM::R9;
+ break;
+ case 12:
+ Register = ARM::R12;
break;
default:
- break; // fallthrough
+ return MCDisassembler::Fail;
}
+
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const unsigned SPRDecoderTable[] = {
+ ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15,
+ ARM::S16, ARM::S17, ARM::S18, ARM::S19,
+ ARM::S20, ARM::S21, ARM::S22, ARM::S23,
+ ARM::S24, ARM::S25, ARM::S26, ARM::S27,
+ ARM::S28, ARM::S29, ARM::S30, ARM::S31
+};
+
+static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const unsigned DPRDecoderTable[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10, ARM::D11,
+ ARM::D12, ARM::D13, ARM::D14, ARM::D15,
+ ARM::D16, ARM::D17, ARM::D18, ARM::D19,
+ ARM::D20, ARM::D21, ARM::D22, ARM::D23,
+ ARM::D24, ARM::D25, ARM::D26, ARM::D27,
+ ARM::D28, ARM::D29, ARM::D30, ARM::D31
+};
+
+static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const unsigned QPRDecoderTable[] = {
+ ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
+ ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
+ ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15
+};
+
+
+static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+ RegNo >>= 1;
+
+ unsigned Register = QPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xF) return MCDisassembler::Fail;
+ // AL predicate is not allowed on Thumb1 branches.
+ if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ if (Val == ARMCC::AL) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ } else
+ Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val)
+ Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ uint32_t imm = Val & 0xFF;
+ uint32_t rot = (Val & 0xF00) >> 7;
+ uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F));
+ Inst.addOperand(MCOperand::CreateImm(rot_imm));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned imm = fieldFromInstruction32(Val, 7, 5);
+
+ // Register-immediate
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
+ break;
+ case 1:
+ Shift = ARM_AM::lsr;
+ break;
+ case 2:
+ Shift = ARM_AM::asr;
+ break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
}
- // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
- // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
- // properly.
- if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
- unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
- switch (slice(insn, 7, 4)) {
- case 11:
- switch (PW) {
- case 2: // Offset
- return ARM::STRH;
- case 3: // Pre-indexed
- return ARM::STRH_PRE;
- case 0: // Post-indexed
- return ARM::STRH_POST;
- default:
- break; // fallthrough
- }
+ if (Shift == ARM_AM::ror && imm == 0)
+ Shift = ARM_AM::rrx;
+
+ unsigned Op = Shift | (imm << 3);
+ Inst.addOperand(MCOperand::CreateImm(Op));
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned Rs = fieldFromInstruction32(Val, 8, 4);
+
+ // Register-register
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rs, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
break;
- case 13:
- switch (PW) {
- case 2: // Offset
- return ARM::LDRD;
- case 3: // Pre-indexed
- return ARM::LDRD_PRE;
- case 0: // Post-indexed
- return ARM::LDRD_POST;
- default:
- break; // fallthrough
- }
+ case 1:
+ Shift = ARM_AM::lsr;
break;
- case 15:
- switch (PW) {
- case 2: // Offset
- return ARM::STRD;
- case 3: // Pre-indexed
- return ARM::STRD_PRE;
- case 0: // Post-indexed
- return ARM::STRD_POST;
- default:
- break; // fallthrough
- }
+ case 2:
+ Shift = ARM_AM::asr;
break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+
+ return S;
+}
+
+static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ bool writebackLoad = false;
+ unsigned writebackReg = 0;
+ switch (Inst.getOpcode()) {
default:
- break; // fallthrough
+ break;
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ writebackLoad = true;
+ writebackReg = Inst.getOperand(0).getReg();
+ break;
+ }
+
+ // Empty register lists are not allowed.
+ if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail;
+ for (unsigned i = 0; i < 16; ++i) {
+ if (Val & (1 << i)) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback not allowed if Rn is in the target list.
+ if (writebackLoad && writebackReg == Inst.end()[-1].getReg())
+ Check(S, MCDisassembler::SoftFail);
}
}
- // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
- // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
- // properly.
- if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
- unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
- switch (slice(insn, 7, 4)) {
- case 11:
- switch (PW) {
- case 2: // Offset
- return ARM::LDRH;
- case 3: // Pre-indexed
- return ARM::LDRH_PRE;
- case 0: // Post-indexed
- return ARM::LDRH_POST;
- default:
- break; // fallthrough
- }
+ return S;
+}
+
+static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction32(Val, 8, 4);
+ unsigned regs = Val & 0xFF;
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeSPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction32(Val, 8, 4);
+ unsigned regs = (Val & 0xFF) / 2;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ // This operand encodes a mask of contiguous zeros between a specified MSB
+ // and LSB. To decode it, we create the mask of all bits MSB-and-lower,
+ // the mask of all bits LSB-and-lower, and then xor them to create
+ // the mask of that's all ones on [msb, lsb]. Finally we not it to
+ // create the final mask.
+ unsigned msb = fieldFromInstruction32(Val, 5, 5);
+ unsigned lsb = fieldFromInstruction32(Val, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (lsb > msb) Check(S, MCDisassembler::SoftFail);
+
+ uint32_t msb_mask = 0xFFFFFFFF;
+ if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
+ uint32_t lsb_mask = (1U << lsb) - 1;
+
+ Inst.addOperand(MCOperand::CreateImm(~(msb_mask ^ lsb_mask)));
+ return S;
+}
+
+static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned CRd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned coproc = fieldFromInstruction32(Insn, 8, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 8);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDC_OPTION:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2LDCL_POST:
+ case ARM::t2LDCL_OPTION:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STC_POST:
+ case ARM::t2STC_OPTION:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STCL_PRE:
+ case ARM::t2STCL_POST:
+ case ARM::t2STCL_OPTION:
+ if (coproc == 0xA || coproc == 0xB)
+ return MCDisassembler::Fail;
+ break;
+ default:
break;
- case 13:
- switch (PW) {
- case 2: // Offset
- return ARM::LDRSB;
- case 3: // Pre-indexed
- return ARM::LDRSB_PRE;
- case 0: // Post-indexed
- return ARM::LDRSB_POST;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(coproc));
+ Inst.addOperand(MCOperand::CreateImm(CRd));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+
+ bool writeback = (P == 0) || (W == 1);
+ unsigned idx_mode = 0;
+ if (P && writeback)
+ idx_mode = ARMII::IndexModePre;
+ else if (!P && writeback)
+ idx_mode = ARMII::IndexModePost;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDC2_OFFSET:
+ case ARM::t2LDC2L_OFFSET:
+ case ARM::t2LDC2_PRE:
+ case ARM::t2LDC2L_PRE:
+ case ARM::t2STC2_OFFSET:
+ case ARM::t2STC2L_OFFSET:
+ case ARM::t2STC2_PRE:
+ case ARM::t2STC2L_PRE:
+ case ARM::LDC2_OFFSET:
+ case ARM::LDC2L_OFFSET:
+ case ARM::LDC2_PRE:
+ case ARM::LDC2L_PRE:
+ case ARM::STC2_OFFSET:
+ case ARM::STC2L_OFFSET:
+ case ARM::STC2_PRE:
+ case ARM::STC2L_PRE:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STCL_PRE:
+ case ARM::LDC_OFFSET:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDCL_PRE:
+ case ARM::STC_OFFSET:
+ case ARM::STCL_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STCL_PRE:
+ imm = ARM_AM::getAM5Opc(U ? ARM_AM::add : ARM_AM::sub, imm);
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ case ARM::t2LDC2_POST:
+ case ARM::t2LDC2L_POST:
+ case ARM::t2STC2_POST:
+ case ARM::t2STC2L_POST:
+ case ARM::LDC2_POST:
+ case ARM::LDC2L_POST:
+ case ARM::STC2_POST:
+ case ARM::STC2L_POST:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDCL_POST:
+ case ARM::t2STC_POST:
+ case ARM::t2STCL_POST:
+ case ARM::LDC_POST:
+ case ARM::LDCL_POST:
+ case ARM::STC_POST:
+ case ARM::STCL_POST:
+ imm |= U << 8;
+ // fall through.
+ default:
+ // The 'option' variant doesn't encode 'U' in the immediate since
+ // the immediate is unsigned [0,255].
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned reg = fieldFromInstruction32(Insn, 25, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRT_POST_REG:
+ case ARM::STRT_POST_IMM:
+ case ARM::STRBT_POST_REG:
+ case ARM::STRBT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRBT_POST_REG:
+ case ARM::LDRBT_POST_IMM:
+ case ARM::LDRT_POST_REG:
+ case ARM::LDRT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::AddrOpc Op = ARM_AM::add;
+ if (!fieldFromInstruction32(Insn, 23, 1))
+ Op = ARM_AM::sub;
+
+ bool writeback = (P == 0) || (W == 1);
+ unsigned idx_mode = 0;
+ if (P && writeback)
+ idx_mode = ARMII::IndexModePre;
+ else if (!P && writeback)
+ idx_mode = ARMII::IndexModePost;
+
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail; // UNPREDICTABLE
+
+ if (reg) {
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
+ switch( fieldFromInstruction32(Insn, 5, 2)) {
+ case 0:
+ Opc = ARM_AM::lsl;
+ break;
+ case 1:
+ Opc = ARM_AM::lsr;
+ break;
+ case 2:
+ Opc = ARM_AM::asr;
+ break;
+ case 3:
+ Opc = ARM_AM::ror;
+ break;
default:
- break; // fallthrough
- }
+ return MCDisassembler::Fail;
+ }
+ unsigned amt = fieldFromInstruction32(Insn, 7, 5);
+ unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ } else {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ unsigned tmp = ARM_AM::getAM2Opc(Op, imm, ARM_AM::lsl, idx_mode);
+ Inst.addOperand(MCOperand::CreateImm(tmp));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned imm = fieldFromInstruction32(Val, 7, 5);
+ unsigned U = fieldFromInstruction32(Val, 12, 1);
+
+ ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ ShOp = ARM_AM::lsl;
+ break;
+ case 1:
+ ShOp = ARM_AM::lsr;
+ break;
+ case 2:
+ ShOp = ARM_AM::asr;
+ break;
+ case 3:
+ ShOp = ARM_AM::ror;
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ unsigned shift;
+ if (U)
+ shift = ARM_AM::getAM2Opc(ARM_AM::add, imm, ShOp);
+ else
+ shift = ARM_AM::getAM2Opc(ARM_AM::sub, imm, ShOp);
+ Inst.addOperand(MCOperand::CreateImm(shift));
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned type = fieldFromInstruction32(Insn, 22, 1);
+ unsigned imm = fieldFromInstruction32(Insn, 8, 4);
+ unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+
+ bool writeback = (W == 1) | (P == 0);
+
+ // For {LD,ST}RD, Rt must be even, else undefined.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (Rt & 0x1) return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (writeback) { // Writeback
+ if (P)
+ U |= ARMII::IndexModePre << 9;
+ else
+ U |= ARMII::IndexModePost << 9;
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
break;
- case 15:
- switch (PW) {
- case 2: // Offset
- return ARM::LDRSH;
- case 3: // Pre-indexed
- return ARM::LDRSH_PRE;
- case 0: // Post-indexed
- return ARM::LDRSH_POST;
+ default:
+ break;
+ }
+
+ if (writeback) {
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ case ARM::LDRHTr:
+ case ARM::LDRSBTr:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (type) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(U | (imm << 4) | Rm));
+ } else {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(U));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned mode = fieldFromInstruction32(Insn, 23, 2);
+
+ switch (mode) {
+ case 0:
+ mode = ARM_AM::da;
+ break;
+ case 1:
+ mode = ARM_AM::ia;
+ break;
+ case 2:
+ mode = ARM_AM::db;
+ break;
+ case 3:
+ mode = ARM_AM::ib;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned reglist = fieldFromInstruction32(Insn, 0, 16);
+
+ if (pred == 0xF) {
+ switch (Inst.getOpcode()) {
+ case ARM::LDMDA:
+ Inst.setOpcode(ARM::RFEDA);
+ break;
+ case ARM::LDMDA_UPD:
+ Inst.setOpcode(ARM::RFEDA_UPD);
+ break;
+ case ARM::LDMDB:
+ Inst.setOpcode(ARM::RFEDB);
+ break;
+ case ARM::LDMDB_UPD:
+ Inst.setOpcode(ARM::RFEDB_UPD);
+ break;
+ case ARM::LDMIA:
+ Inst.setOpcode(ARM::RFEIA);
+ break;
+ case ARM::LDMIA_UPD:
+ Inst.setOpcode(ARM::RFEIA_UPD);
+ break;
+ case ARM::LDMIB:
+ Inst.setOpcode(ARM::RFEIB);
+ break;
+ case ARM::LDMIB_UPD:
+ Inst.setOpcode(ARM::RFEIB_UPD);
+ break;
+ case ARM::STMDA:
+ Inst.setOpcode(ARM::SRSDA);
+ break;
+ case ARM::STMDA_UPD:
+ Inst.setOpcode(ARM::SRSDA_UPD);
+ break;
+ case ARM::STMDB:
+ Inst.setOpcode(ARM::SRSDB);
+ break;
+ case ARM::STMDB_UPD:
+ Inst.setOpcode(ARM::SRSDB_UPD);
+ break;
+ case ARM::STMIA:
+ Inst.setOpcode(ARM::SRSIA);
+ break;
+ case ARM::STMIA_UPD:
+ Inst.setOpcode(ARM::SRSIA_UPD);
+ break;
+ case ARM::STMIB:
+ Inst.setOpcode(ARM::SRSIB);
+ break;
+ case ARM::STMIB_UPD:
+ Inst.setOpcode(ARM::SRSIB_UPD);
+ break;
default:
- break; // fallthrough
- }
+ if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail;
+ }
+
+ // For stores (which become SRS's, the only operand is the mode.
+ if (fieldFromInstruction32(Insn, 20, 1) == 0) {
+ Inst.addOperand(
+ MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4)));
+ return S;
+ }
+
+ return DecodeRFEInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail; // Tied
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeRegListOperand(Inst, reglist, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction32(Insn, 18, 2);
+ unsigned M = fieldFromInstruction32(Insn, 17, 1);
+ unsigned iflags = fieldFromInstruction32(Insn, 6, 3);
+ unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::CPS3p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::CPS2p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction32(Insn, 9, 2);
+ unsigned M = fieldFromInstruction32(Insn, 8, 1);
+ unsigned iflags = fieldFromInstruction32(Insn, 5, 3);
+ unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::t2CPS3p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::t2CPS2p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 8, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction32(Insn, 0, 8) << 0);
+ imm |= (fieldFromInstruction32(Insn, 12, 3) << 8);
+ imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction32(Insn, 26, 1) << 11);
+
+ if (Inst.getOpcode() == ARM::t2MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction32(Insn, 0, 12) << 0);
+ imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+
+ if (Inst.getOpcode() == ARM::MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Ra = fieldFromInstruction32(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Ra, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned add = fieldFromInstruction32(Val, 12, 1);
+ unsigned imm = fieldFromInstruction32(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!add) imm *= -1;
+ if (imm == 0 && !add) imm = INT32_MIN;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ if (Rn == 15)
+ tryAddingPcLoadReferenceComment(Address, Address + imm + 8, Decoder);
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned U = fieldFromInstruction32(Val, 8, 1);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (U)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, imm)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm)));
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2;
+
+ if (pred == 0xF) {
+ Inst.setOpcode(ARM::BLXi);
+ imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+ return S;
+ }
+
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true,
+ 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned align = fieldFromInstruction32(Val, 4, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!align)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else
+ Inst.addOperand(MCOperand::CreateImm(4 << align));
+
+ return S;
+}
+
+static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ // First output register
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Second output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD1d8T:
+ case ARM::VLD1d16T:
+ case ARM::VLD1d32T:
+ case ARM::VLD1d64T:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
break;
+ case ARM::VLD2b8:
+ case ARM::VLD2b16:
+ case ARM::VLD2b32:
+ case ARM::VLD2b8_UPD:
+ case ARM::VLD2b16_UPD:
+ case ARM::VLD2b32_UPD:
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
default:
- break; // fallthrough
+ break;
+ }
+
+ // Third output register
+ switch(Inst.getOpcode()) {
+ case ARM::VLD1d8T:
+ case ARM::VLD1d16T:
+ case ARM::VLD1d32T:
+ case ARM::VLD1d64T:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Writeback operand
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1d8_UPD:
+ case ARM::VLD1d16_UPD:
+ case ARM::VLD1d32_UPD:
+ case ARM::VLD1d64_UPD:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD2b8_UPD:
+ case ARM::VLD2b16_UPD:
+ case ARM::VLD2b32_UPD:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ // Writeback Operand
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8_UPD:
+ case ARM::VST1d16_UPD:
+ case ARM::VST1d32_UPD:
+ case ARM::VST1d64_UPD:
+ case ARM::VST1q8_UPD:
+ case ARM::VST1q16_UPD:
+ case ARM::VST1q32_UPD:
+ case ARM::VST1q64_UPD:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2d8_UPD:
+ case ARM::VST2d16_UPD:
+ case ARM::VST2d32_UPD:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST2b8_UPD:
+ case ARM::VST2b16_UPD:
+ case ARM::VST2b32_UPD:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ // First input register
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Second input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1q8:
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
+ case ARM::VST1q8_UPD:
+ case ARM::VST1q16_UPD:
+ case ARM::VST1q32_UPD:
+ case ARM::VST1q64_UPD:
+ case ARM::VST1d8T:
+ case ARM::VST1d16T:
+ case ARM::VST1d32T:
+ case ARM::VST1d64T:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2d8:
+ case ARM::VST2d16:
+ case ARM::VST2d32:
+ case ARM::VST2d8_UPD:
+ case ARM::VST2d16_UPD:
+ case ARM::VST2d32_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST2b8:
+ case ARM::VST2b16:
+ case ARM::VST2b32:
+ case ARM::VST2b8_UPD:
+ case ARM::VST2b16_UPD:
+ case ARM::VST2b32_UPD:
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Third input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8T:
+ case ARM::VST1d16T:
+ case ARM::VST1d32T:
+ case ARM::VST1d64T:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+ unsigned size = fieldFromInstruction32(Insn, 6, 2);
+ unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
+
+ align *= (1 << size);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (regs == 2) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+ unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ align *= 2*size;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned size = fieldFromInstruction32(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+
+ if (size == 0x3) {
+ size = 4;
+ align = 16;
+ } else {
+ if (size == 2) {
+ size = 1 << size;
+ align *= 8;
+ } else {
+ size = 1 << size;
+ align *= 4*size;
}
}
-AutoGenedDecoder:
- // Calling the auto-generated decoder function.
- return decodeInstruction(insn);
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
}
-// Helper function for special case handling of LDR (literal) and friends.
-// See, for example, A6.3.7 Load word: Table A6-18 Load word.
-// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
-// before returning it.
-static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
- switch (Opcode) {
- default:
- return Opcode; // Return unmorphed opcode.
+static DecodeStatus
+DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+ imm |= fieldFromInstruction32(Insn, 16, 3) << 4;
+ imm |= fieldFromInstruction32(Insn, 24, 1) << 7;
+ imm |= fieldFromInstruction32(Insn, 8, 4) << 8;
+ imm |= fieldFromInstruction32(Insn, 5, 1) << 12;
+ unsigned Q = fieldFromInstruction32(Insn, 6, 1);
+
+ if (Q) {
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ switch (Inst.getOpcode()) {
+ case ARM::VORRiv4i16:
+ case ARM::VORRiv2i32:
+ case ARM::VBICiv4i16:
+ case ARM::VBICiv2i32:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VORRiv8i16:
+ case ARM::VORRiv4i32:
+ case ARM::VBICiv8i16:
+ case ARM::VBICiv4i32:
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 18, 2);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(8 << size));
+
+ return S;
+}
+
+static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(8 - Val));
+ return MCDisassembler::Success;
+}
- case ARM::t2LDR_POST: case ARM::t2LDR_PRE:
- case ARM::t2LDRi12: case ARM::t2LDRi8:
- case ARM::t2LDRs: case ARM::t2LDRT:
- return ARM::t2LDRpci;
+static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(16 - Val));
+ return MCDisassembler::Success;
+}
- case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE:
- case ARM::t2LDRBi12: case ARM::t2LDRBi8:
- case ARM::t2LDRBs: case ARM::t2LDRBT:
- return ARM::t2LDRBpci;
+static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(32 - Val));
+ return MCDisassembler::Success;
+}
- case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE:
- case ARM::t2LDRHi12: case ARM::t2LDRHi8:
- case ARM::t2LDRHs: case ARM::t2LDRHT:
- return ARM::t2LDRHpci;
+static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
- case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE:
- case ARM::t2LDRSBi12: case ARM::t2LDRSBi8:
- case ARM::t2LDRSBs: case ARM::t2LDRSBT:
- return ARM::t2LDRSBpci;
+static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 7, 1) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ unsigned op = fieldFromInstruction32(Insn, 6, 1);
+ unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (op) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail; // Writeback
+ }
- case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE:
- case ARM::t2LDRSHi12: case ARM::t2LDRSHi8:
- case ARM::t2LDRSHs: case ARM::t2LDRSHT:
- return ARM::t2LDRSHpci;
+ for (unsigned i = 0; i < length; ++i) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
}
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
}
-// Helper function for special case handling of PLD (literal) and friends.
-// See A8.6.117 T1 & T2 and friends for why we morphed the opcode
-// before returning it.
-static unsigned T2Morph2PLDLiteral(unsigned Opcode) {
- switch (Opcode) {
- default:
- return Opcode; // Return unmorphed opcode.
-
- case ARM::t2PLDi8: case ARM::t2PLDs:
- case ARM::t2PLDWi12: case ARM::t2PLDWi8:
- case ARM::t2PLDWs:
- return ARM::t2PLDi12;
-
- case ARM::t2PLIi8: case ARM::t2PLIs:
- return ARM::t2PLIi12;
- }
-}
-
-/// decodeThumbSideEffect is a decorator function which can potentially twiddle
-/// the instruction or morph the returned opcode under Thumb2.
-///
-/// First it checks whether the insn is a NEON or VFP instr; if true, bit
-/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
-/// equivalent instruction and decodeInstruction is called with the transformed
-/// insn.
-///
-/// Next, there is special handling for Load byte/halfword/word instruction by
-/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
-/// Thumb2 instruction. See comments below for further details.
-///
-/// Finally, one last check is made to see whether the insn is a NEON/VFP and
-/// decodeInstruction(insn) is invoked on the original insn.
-///
-/// Otherwise, decodeThumbInstruction is called with the original insn.
-static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
- if (IsThumb2) {
- uint16_t op1 = slice(insn, 28, 27);
- uint16_t op2 = slice(insn, 26, 20);
-
- // A6.3 32-bit Thumb instruction encoding
- // Table A6-9 32-bit Thumb instruction encoding
-
- // The coprocessor instructions of interest are transformed to their ARM
- // equivalents.
-
- // --------- Transform Begin Marker ---------
- if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
- // A7.4 Advanced SIMD data-processing instructions
- // U bit of Thumb corresponds to Inst{24} of ARM.
- uint16_t U = slice(op1, 1, 1);
-
- // Inst{28-24} of ARM = {1,0,0,1,U};
- uint16_t bits28_24 = 9 << 1 | U;
- DEBUG(showBitVector(errs(), insn));
- setSlice(insn, 28, 24, bits28_24);
- return decodeInstruction(insn);
+static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned dst = fieldFromInstruction16(Insn, 8, 3);
+ unsigned imm = fieldFromInstruction16(Insn, 0, 8);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ switch(Inst.getOpcode()) {
+ default:
+ return MCDisassembler::Fail;
+ case ARM::tADR:
+ break; // tADR does not explicitly represent the PC as an operand.
+ case ARM::tADDrSPi:
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ return S;
+}
+
+static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 0, 3);
+ unsigned Rm = fieldFromInstruction32(Val, 3, 3);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 0, 3);
+ unsigned imm = fieldFromInstruction32(Val, 3, 5);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = Val << 2;
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ tryAddingPcLoadReferenceComment(Address, (Address & ~2u) + imm + 4, Decoder);
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 6, 4);
+ unsigned Rm = fieldFromInstruction32(Val, 2, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDs:
+ case ARM::t2PLDWs:
+ case ARM::t2PLIs:
+ break;
+ default: {
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
}
+ }
- if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
- // A7.7 Advanced SIMD element or structure load/store instructions
- // Inst{27-24} of Thumb = 0b1001
- // Inst{27-24} of ARM = 0b0100
- DEBUG(showBitVector(errs(), insn));
- setSlice(insn, 27, 24, 4);
- return decodeInstruction(insn);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ if (Rn == 0xF) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRBs:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRHs:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHs:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2PLDs:
+ Inst.setOpcode(ARM::t2PLDi12);
+ Inst.addOperand(MCOperand::CreateReg(ARM::PC));
+ break;
+ default:
+ return MCDisassembler::Fail;
}
- // --------- Transform End Marker ---------
-
- unsigned unmorphed = decodeThumbInstruction(insn);
-
- // See, for example, A6.3.7 Load word: Table A6-18 Load word.
- // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
- // before returning it to our caller.
- if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
- && slice(insn, 19, 16) == 15) {
- unsigned morphed = T2Morph2LoadLiteral(unmorphed);
- if (morphed != unmorphed)
- return morphed;
+
+ int imm = fieldFromInstruction32(Insn, 0, 12);
+ if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+ }
+
+ unsigned addrmode = fieldFromInstruction32(Insn, 4, 2);
+ addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2;
+ addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6;
+ if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0xFF;
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm << 2));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 9);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8S4(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 8, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0xFF;
+ if (Val == 0)
+ imm = INT32_MIN;
+ else if (!(Val & 0x100))
+ imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 9);
+
+ // Some instructions always use an additive offset.
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRT:
+ case ARM::t2LDRBT:
+ case ARM::t2LDRHT:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSHT:
+ case ARM::t2STRT:
+ case ARM::t2STRBT:
+ case ARM::t2STRHT:
+ imm |= 0x100;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ addr |= fieldFromInstruction32(Insn, 9, 1) << 8;
+ addr |= Rn << 9;
+ unsigned load = fieldFromInstruction32(Insn, 20, 1);
+
+ if (!load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 12);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = fieldFromInstruction16(Insn, 0, 7);
+
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Inst.getOpcode() == ARM::tADDrSP) {
+ unsigned Rdm = fieldFromInstruction16(Insn, 0, 3);
+ Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ } else if (Inst.getOpcode() == ARM::tADDspr) {
+ unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
+
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
+ unsigned flags = fieldFromInstruction16(Insn, 0, 3);
+
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(flags));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned add = fieldFromInstruction32(Insn, 4, 1);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(add));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address,
+ (Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xA || Val == 0xB)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus
+DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 22, 4);
+ if (pred == 0xE || pred == 0xF) {
+ unsigned opc = fieldFromInstruction32(Insn, 4, 28);
+ switch (opc) {
+ default:
+ return MCDisassembler::Fail;
+ case 0xf3bf8f4:
+ Inst.setOpcode(ARM::t2DSB);
+ break;
+ case 0xf3bf8f5:
+ Inst.setOpcode(ARM::t2DMB);
+ break;
+ case 0xf3bf8f6:
+ Inst.setOpcode(ARM::t2ISB);
+ break;
}
- // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for
- // why we morphed the opcode before returning it to our caller.
- if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF
- && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1
- && slice(insn, 19, 16) == 15) {
- unsigned morphed = T2Morph2PLDLiteral(unmorphed);
- if (morphed != unmorphed)
- return morphed;
+ unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+ return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
+ }
+
+ unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1;
+ brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19;
+ brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18;
+ brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12;
+ brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20;
+
+ if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+// Decode a shifted immediate operand. These basically consist
+// of an 8-bit value, and a 4-bit directive that specifies either
+// a splat operation or a rotation.
+static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
+ if (ctrl == 0) {
+ unsigned byte = fieldFromInstruction32(Val, 8, 2);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ switch (byte) {
+ case 0:
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ case 1:
+ Inst.addOperand(MCOperand::CreateImm((imm << 16) | imm));
+ break;
+ case 2:
+ Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 8)));
+ break;
+ case 3:
+ Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 16) |
+ (imm << 8) | imm));
+ break;
}
+ } else {
+ unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80;
+ unsigned rot = fieldFromInstruction32(Val, 7, 5);
+ unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ }
- // One last check for NEON/VFP instructions.
- if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
- return decodeInstruction(insn);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ Inst.addOperand(MCOperand::CreateImm(Val << 1));
+ return MCDisassembler::Success;
+}
- // Fall through.
+static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ switch (Val) {
+ default:
+ return MCDisassembler::Fail;
+ case 0xF: // SY
+ case 0xE: // ST
+ case 0xB: // ISH
+ case 0xA: // ISHST
+ case 0x7: // NSH
+ case 0x6: // NSHST
+ case 0x3: // OSH
+ case 0x2: // OSHST
+ break;
}
- return decodeThumbInstruction(insn);
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
}
-//
-// Public interface for the disassembler
-//
+static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!Val) return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
-bool ARMDisassembler::getInstruction(MCInst &MI,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os) const {
- // The machine instruction.
- uint32_t insn;
- uint8_t bytes[4];
+static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
- // We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1)
- return false;
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- // Encoded as a small-endian 32-bit word in the stream.
- insn = (bytes[3] << 24) |
- (bytes[2] << 16) |
- (bytes[1] << 8) |
- (bytes[0] << 0);
-
- unsigned Opcode = decodeARMInstruction(insn);
- ARMFormat Format = ARMFormats[Opcode];
- Size = 4;
-
- DEBUG({
- errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode)
- << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
- << ")\n";
- showBitVector(errs(), insn);
- });
-
- OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
- if (!Builder)
- return false;
+ if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
- Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
- getDisInfoBlock(), getMCContext(),
- Address);
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
- if (!Builder->Build(MI, insn))
- return false;
+ return S;
+}
- return true;
+
+static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder){
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
+ if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
}
-bool ThumbDisassembler::getInstruction(MCInst &MI,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os) const {
- // The Thumb instruction stream is a sequence of halfwords.
-
- // This represents the first halfword as well as the machine instruction
- // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
- // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
- // the top half followed by the second halfword.
- unsigned insn = 0;
- // Possible second halfword.
- uint16_t insn1 = 0;
-
- // A6.1 Thumb instruction set encoding
- //
- // If bits [15:11] of the halfword being decoded take any of the following
- // values, the halfword is the first halfword of a 32-bit instruction:
- // o 0b11101
- // o 0b11110
- // o 0b11111.
- //
- // Otherwise, the halfword is a 16-bit instruction.
-
- // Read 2 bytes of data first.
- uint8_t bytes[2];
- if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1)
- return false;
+static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
- // Encoded as a small-endian 16-bit halfword in the stream.
- insn = (bytes[1] << 8) | bytes[0];
- unsigned bits15_11 = slice(insn, 15, 11);
- bool IsThumb2 = false;
+static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+ if (Rm == 0xF) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
- // 32-bit instructions if the bits [15:11] of the halfword matches
- // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
- if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
- IsThumb2 = true;
- if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1)
- return false;
- // Encoded as a small-endian 16-bit halfword in the stream.
- insn1 = (bytes[1] << 8) | bytes[0];
- insn = (insn << 16 | insn1);
- }
-
- // The insn could potentially be bit-twiddled in order to be decoded as an ARM
- // NEON/VFP opcode. In such case, the modified insn is later disassembled as
- // an ARM NEON/VFP instruction.
- //
- // This is a short term solution for lack of encoding bits specified for the
- // Thumb2 NEON/VFP instructions. The long term solution could be adding some
- // infrastructure to have each instruction support more than one encodings.
- // Which encoding is used would be based on which subtarget the compiler/
- // disassembler is working with at the time. This would allow the sharing of
- // the NEON patterns between ARM and Thumb2, as well as potential greater
- // sharing between the regular ARM instructions and the 32-bit wide Thumb2
- // instructions as well.
- unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
-
- ARMFormat Format = ARMFormats[Opcode];
- Size = IsThumb2 ? 4 : 2;
-
- DEBUG({
- errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
- << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
- << ")\n";
- showBitVector(errs(), insn);
- });
-
- OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
- if (!Builder)
- return false;
- Builder->SetSession(const_cast<Session *>(&SO));
+static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
- Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
- getDisInfoBlock(), getMCContext(),
- Address);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- if (!Builder->Build(MI, insn))
- return false;
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
- return true;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
}
-// A8.6.50
-// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition.
-static unsigned short CountITSize(unsigned ITMask) {
- // First count the trailing zeros of the IT mask.
- unsigned TZ = CountTrailingZeros_32(ITMask);
- if (TZ > 3) {
- DEBUG(errs() << "Encoding error: IT Mask '0000'");
- return 0;
+static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ align = 4;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
}
- return (4 - TZ);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-/// Init ITState. Note that at least one bit is always 1 in mask.
-bool Session::InitIT(unsigned short bits7_0) {
- ITCounter = CountITSize(slice(bits7_0, 3, 0));
- if (ITCounter == 0)
- return false;
+static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
- // A8.6.50 IT
- unsigned short FirstCond = slice(bits7_0, 7, 4);
- if (FirstCond == 0xF) {
- DEBUG(errs() << "Encoding error: IT FirstCond '1111'");
- return false;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ align = 4;
}
- if (FirstCond == 0xE && ITCounter != 1) {
- DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'");
- return false;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
}
- ITState = bits7_0;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
- return true;
+ return S;
}
-/// Update ITState if necessary.
-void Session::UpdateIT() {
- assert(ITCounter);
- --ITCounter;
- if (ITCounter == 0)
- ITState = 0;
- else {
- unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1;
- setSlice(ITState, 4, 0, NewITState4_0);
+
+static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction32(Insn, 5, 3);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
}
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-static MCDisassembler *createARMDisassembler(const Target &T) {
- return new ARMDisassembler;
+static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction32(Insn, 5, 3);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-static MCDisassembler *createThumbDisassembler(const Target &T) {
- return new ThumbDisassembler;
+
+static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-extern "C" void LLVMInitializeARMDisassembler() {
- // Register the disassembler.
- TargetRegistry::RegisterMCDisassembler(TheARMTarget,
- createARMDisassembler);
- TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
- createThumbDisassembler);
+static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-EDInstInfo *ARMDisassembler::getEDInfo() const {
- return instInfoARM;
+
+static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ align = 4 << fieldFromInstruction32(Insn, 4, 2);
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-EDInstInfo *ThumbDisassembler::getEDInfo() const {
- return instInfoARM;
+static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ align = 4 << fieldFromInstruction32(Insn, 4, 2);
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned pred = fieldFromInstruction16(Insn, 4, 4);
+ // The InstPrinter needs to have the low bit of the predicate in
+ // the mask operand to be able to print it properly.
+ unsigned mask = fieldFromInstruction16(Insn, 0, 5);
+
+ if (pred == 0xF) {
+ pred = 0xE;
+ S = MCDisassembler::SoftFail;
+ }
+
+ if ((mask & 0xF) == 0) {
+ // Preserve the high bit of the mask, which is the low bit of
+ // the predicate.
+ mask &= 0x10;
+ mask |= 0x8;
+ S = MCDisassembler::SoftFail;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(pred));
+ Inst.addOperand(MCOperand::CreateImm(mask));
+ return S;
}
+
+static DecodeStatus
+DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+ if (Rt == Rt2)
+ Check(S, MCDisassembler::SoftFail);
+
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus
+DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
+ unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
+ if (sign1 != sign2) return MCDisassembler::Fail;
+
+ unsigned Val = fieldFromInstruction32(Insn, 0, 8);
+ Val |= fieldFromInstruction32(Insn, 12, 3) << 8;
+ Val |= fieldFromInstruction32(Insn, 26, 1) << 11;
+ Val |= sign1 << 12;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ // Shift of "asr #32" is not allowed in Thumb2 mode.
+ if (Val == 0x20) S = MCDisassembler::SoftFail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return S;
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h
deleted file mode 100644
index 0a74a38..0000000
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains the header for ARMDisassembler and ThumbDisassembler, both are
-// subclasses of MCDisassembler.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMDISASSEMBLER_H
-#define ARMDISASSEMBLER_H
-
-#include "llvm/MC/MCDisassembler.h"
-
-namespace llvm {
-
-class MCInst;
-class MemoryObject;
-class raw_ostream;
-
-struct EDInstInfo;
-
-/// ARMDisassembler - ARM disassembler for all ARM platforms.
-class ARMDisassembler : public MCDisassembler {
-public:
- /// Constructor - Initializes the disassembler.
- ///
- ARMDisassembler() :
- MCDisassembler() {
- }
-
- ~ARMDisassembler() {
- }
-
- /// getInstruction - See MCDisassembler.
- bool getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream) const;
-
- /// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
-private:
-};
-
-// Forward declaration.
-class ARMBasicMCBuilder;
-
-/// Session - Keep track of the IT Block progression.
-class Session {
- friend class ARMBasicMCBuilder;
-public:
- Session() : ITCounter(0), ITState(0) {}
- ~Session() {}
- /// InitIT - Initializes ITCounter/ITState.
- bool InitIT(unsigned short bits7_0);
- /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
- void UpdateIT();
-
-private:
- unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4.
- unsigned ITState; // A2.5.2 Consists of IT[7:5] and IT[4:0] initially.
-};
-
-/// ThumbDisassembler - Thumb disassembler for all ARM platforms.
-class ThumbDisassembler : public MCDisassembler {
-public:
- /// Constructor - Initializes the disassembler.
- ///
- ThumbDisassembler() :
- MCDisassembler(), SO() {
- }
-
- ~ThumbDisassembler() {
- }
-
- /// getInstruction - See MCDisassembler.
- bool getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream) const;
-
- /// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
-private:
- Session SO;
-};
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
deleted file mode 100644
index d89c80a..0000000
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ /dev/null
@@ -1,3818 +0,0 @@
-//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains code to represent the core concepts of Builder and DisassembleFP
-// to solve the problem of disassembling an ARM instr.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm-disassembler"
-
-#include "ARMDisassemblerCore.h"
-#include "ARMAddressingModes.h"
-#include "ARMMCExpr.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-//#define DEBUG(X) do { X; } while (0)
-
-/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
-/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the
-/// operand info for each ARMInsts[i].
-///
-/// Together with an instruction's encoding format, we can take advantage of the
-/// NumOperands and the OpInfo fields of the target instruction description in
-/// the quest to build out the MCOperand list for an MCInst.
-///
-/// The general guideline is that with a known format, the number of dst and src
-/// operands are well-known. The dst is built first, followed by the src
-/// operand(s). The operands not yet used at this point are for the Implicit
-/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is
-/// defined with two components:
-///
-/// def pred { // Operand PredicateOperand
-/// ValueType Type = OtherVT;
-/// string PrintMethod = "printPredicateOperand";
-/// string AsmOperandLowerMethod = ?;
-/// dag MIOperandInfo = (ops i32imm, CCR);
-/// AsmOperandClass ParserMatchClass = ImmAsmOperand;
-/// dag DefaultOps = (ops (i32 14), (i32 zero_reg));
-/// }
-///
-/// which is manifested by the MCOperandInfo[] of:
-///
-/// { 0, 0|(1<<MCOI::Predicate), 0 },
-/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 }
-///
-/// So the first predicate MCOperand corresponds to the immediate part of the
-/// ARM condition field (Inst{31-28}), and the second predicate MCOperand
-/// corresponds to a register kind of ARM::CPSR.
-///
-/// For the Defs part, in the simple case of only cc_out:$s, we have:
-///
-/// def cc_out { // Operand OptionalDefOperand
-/// ValueType Type = OtherVT;
-/// string PrintMethod = "printSBitModifierOperand";
-/// string AsmOperandLowerMethod = ?;
-/// dag MIOperandInfo = (ops CCR);
-/// AsmOperandClass ParserMatchClass = ImmAsmOperand;
-/// dag DefaultOps = (ops (i32 zero_reg));
-/// }
-///
-/// which is manifested by the one MCOperandInfo of:
-///
-/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 }
-///
-
-namespace llvm {
-extern MCInstrDesc ARMInsts[];
-}
-
-using namespace llvm;
-
-const char *ARMUtils::OpcodeName(unsigned Opcode) {
- return ARMInsts[Opcode].Name;
-}
-
-// Return the register enum Based on RegClass and the raw register number.
-// FIXME: Auto-gened?
-static unsigned
-getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
- if (RegClassID == ARM::rGPRRegClassID) {
- // Check for The register numbers 13 and 15 that are not permitted for many
- // Thumb register specifiers.
- if (RawRegister == 13 || RawRegister == 15) {
- B->SetErr(-1);
- return 0;
- }
- // For this purpose, we can treat rGPR as if it were GPR.
- RegClassID = ARM::GPRRegClassID;
- }
-
- // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
- // A7.3 register encoding
- // Qd -> bit[12] == 0
- // Qn -> bit[16] == 0
- // Qm -> bit[0] == 0
- //
- // If one of these bits is 1, the instruction is UNDEFINED.
- if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) {
- B->SetErr(-1);
- return 0;
- }
- unsigned RegNum =
- RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
-
- switch (RegNum) {
- default:
- break;
- case 0:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D0;
- case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- return ARM::Q0;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0;
- }
- break;
- case 1:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D1;
- case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- return ARM::Q1;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1;
- }
- break;
- case 2:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D2;
- case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- return ARM::Q2;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2;
- }
- break;
- case 3:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D3;
- case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- return ARM::Q3;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3;
- }
- break;
- case 4:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D4;
- case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4;
- }
- break;
- case 5:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D5;
- case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5;
- }
- break;
- case 6:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D6;
- case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6;
- }
- break;
- case 7:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D7;
- case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7;
- }
- break;
- case 8:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R8;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8;
- case ARM::QPRRegClassID: return ARM::Q8;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8;
- }
- break;
- case 9:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R9;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9;
- case ARM::QPRRegClassID: return ARM::Q9;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9;
- }
- break;
- case 10:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R10;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10;
- case ARM::QPRRegClassID: return ARM::Q10;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10;
- }
- break;
- case 11:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R11;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11;
- case ARM::QPRRegClassID: return ARM::Q11;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11;
- }
- break;
- case 12:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R12;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12;
- case ARM::QPRRegClassID: return ARM::Q12;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12;
- }
- break;
- case 13:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::SP;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13;
- case ARM::QPRRegClassID: return ARM::Q13;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13;
- }
- break;
- case 14:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::LR;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14;
- case ARM::QPRRegClassID: return ARM::Q14;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14;
- }
- break;
- case 15:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::PC;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15;
- case ARM::QPRRegClassID: return ARM::Q15;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15;
- }
- break;
- case 16:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D16;
- case ARM::SPRRegClassID: return ARM::S16;
- }
- break;
- case 17:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D17;
- case ARM::SPRRegClassID: return ARM::S17;
- }
- break;
- case 18:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D18;
- case ARM::SPRRegClassID: return ARM::S18;
- }
- break;
- case 19:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D19;
- case ARM::SPRRegClassID: return ARM::S19;
- }
- break;
- case 20:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D20;
- case ARM::SPRRegClassID: return ARM::S20;
- }
- break;
- case 21:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D21;
- case ARM::SPRRegClassID: return ARM::S21;
- }
- break;
- case 22:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D22;
- case ARM::SPRRegClassID: return ARM::S22;
- }
- break;
- case 23:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D23;
- case ARM::SPRRegClassID: return ARM::S23;
- }
- break;
- case 24:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D24;
- case ARM::SPRRegClassID: return ARM::S24;
- }
- break;
- case 25:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D25;
- case ARM::SPRRegClassID: return ARM::S25;
- }
- break;
- case 26:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D26;
- case ARM::SPRRegClassID: return ARM::S26;
- }
- break;
- case 27:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D27;
- case ARM::SPRRegClassID: return ARM::S27;
- }
- break;
- case 28:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D28;
- case ARM::SPRRegClassID: return ARM::S28;
- }
- break;
- case 29:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D29;
- case ARM::SPRRegClassID: return ARM::S29;
- }
- break;
- case 30:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D30;
- case ARM::SPRRegClassID: return ARM::S30;
- }
- break;
- case 31:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D31;
- case ARM::SPRRegClassID: return ARM::S31;
- }
- break;
- }
- DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n");
- // Encoding error. Mark the builder with error code != 0.
- B->SetErr(-1);
- return 0;
-}
-
-///////////////////////////////
-// //
-// Utility Functions //
-// //
-///////////////////////////////
-
-// Extract/Decode Rd: Inst{15-12}.
-static inline unsigned decodeRd(uint32_t insn) {
- return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask;
-}
-
-// Extract/Decode Rn: Inst{19-16}.
-static inline unsigned decodeRn(uint32_t insn) {
- return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask;
-}
-
-// Extract/Decode Rm: Inst{3-0}.
-static inline unsigned decodeRm(uint32_t insn) {
- return (insn & ARMII::GPRRegMask);
-}
-
-// Extract/Decode Rs: Inst{11-8}.
-static inline unsigned decodeRs(uint32_t insn) {
- return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask;
-}
-
-static inline unsigned getCondField(uint32_t insn) {
- return (insn >> ARMII::CondShift);
-}
-
-static inline unsigned getIBit(uint32_t insn) {
- return (insn >> ARMII::I_BitShift) & 1;
-}
-
-static inline unsigned getAM3IBit(uint32_t insn) {
- return (insn >> ARMII::AM3_I_BitShift) & 1;
-}
-
-static inline unsigned getPBit(uint32_t insn) {
- return (insn >> ARMII::P_BitShift) & 1;
-}
-
-static inline unsigned getUBit(uint32_t insn) {
- return (insn >> ARMII::U_BitShift) & 1;
-}
-
-static inline unsigned getPUBits(uint32_t insn) {
- return (insn >> ARMII::U_BitShift) & 3;
-}
-
-static inline unsigned getSBit(uint32_t insn) {
- return (insn >> ARMII::S_BitShift) & 1;
-}
-
-static inline unsigned getWBit(uint32_t insn) {
- return (insn >> ARMII::W_BitShift) & 1;
-}
-
-static inline unsigned getDBit(uint32_t insn) {
- return (insn >> ARMII::D_BitShift) & 1;
-}
-
-static inline unsigned getNBit(uint32_t insn) {
- return (insn >> ARMII::N_BitShift) & 1;
-}
-
-static inline unsigned getMBit(uint32_t insn) {
- return (insn >> ARMII::M_BitShift) & 1;
-}
-
-// See A8.4 Shifts applied to a register.
-// A8.4.2 Register controlled shifts.
-//
-// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits
-// into llvm enums for shift opcode. The API clients should pass in the value
-// encoded with two bits, so the assert stays to signal a wrong API usage.
-//
-// A8-12: DecodeRegShift()
-static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
- switch (bits) {
- default: assert(0 && "No such value"); return ARM_AM::no_shift;
- case 0: return ARM_AM::lsl;
- case 1: return ARM_AM::lsr;
- case 2: return ARM_AM::asr;
- case 3: return ARM_AM::ror;
- }
-}
-
-// See A8.4 Shifts applied to a register.
-// A8.4.1 Constant shifts.
-//
-// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5
-// encodings into the intended ShiftOpc and shift amount.
-//
-// A8-11: DecodeImmShift()
-static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
- if (ShImm != 0)
- return;
- switch (ShOp) {
- case ARM_AM::no_shift:
- case ARM_AM::rrx:
- break;
- case ARM_AM::lsl:
- ShOp = ARM_AM::no_shift;
- break;
- case ARM_AM::lsr:
- case ARM_AM::asr:
- ShImm = 32;
- break;
- case ARM_AM::ror:
- ShOp = ARM_AM::rrx;
- break;
- }
-}
-
-// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
-// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode. The API
-// clients should pass in the value encoded with two bits, so the assert stays
-// to signal a wrong API usage.
-static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
- switch (bits) {
- default: assert(0 && "No such value"); return ARM_AM::bad_am_submode;
- case 1: return ARM_AM::ia; // P=0 U=1
- case 3: return ARM_AM::ib; // P=1 U=1
- case 0: return ARM_AM::da; // P=0 U=0
- case 2: return ARM_AM::db; // P=1 U=0
- }
-}
-
-////////////////////////////////////////////
-// //
-// Disassemble function definitions //
-// //
-////////////////////////////////////////////
-
-/// There is a separate Disassemble*Frm function entry for disassembly of an ARM
-/// instr into a list of MCOperands in the appropriate order, with possible dst,
-/// followed by possible src(s).
-///
-/// The processing of the predicate, and the 'S' modifier bit, if MI modifies
-/// the CPSR, is factored into ARMBasicMCBuilder's method named
-/// TryPredicateAndSBitModifier.
-
-static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- assert(0 && "Unexpected pseudo instruction!");
- return false;
-}
-
-// A8.6.94 MLA
-// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE;
-//
-// A8.6.105 MUL
-// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-//
-// A8.6.246 UMULL
-// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-// if dHi == dLo then UNPREDICTABLE;
-static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) {
- unsigned R19_16 = slice(insn, 19, 16);
- unsigned R15_12 = slice(insn, 15, 12);
- unsigned R11_8 = slice(insn, 11, 8);
- unsigned R3_0 = slice(insn, 3, 0);
- switch (Opcode) {
- default:
- // Did we miss an opcode?
- DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!");
- return false;
- case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT:
- case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT:
- case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR:
- case ARM::USADA8:
- if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
- return true;
- return false;
- case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR:
- case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT:
- case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX:
- // A8.6.167 SMLAD & A8.6.172 SMLSD
- case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX:
- case ARM::USAD8:
- if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15)
- return true;
- return false;
- case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL:
- case ARM::UMULL:
- case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT:
- case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX:
- if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
- return true;
- if (R19_16 == R15_12)
- return true;
- return false;;
- }
-}
-
-// Multiply Instructions.
-// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR,
-// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience):
-// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
-// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is
-// only for {d, n, m}.
-//
-// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD,
-// SMUADX, and USAD8 (for convenience):
-// Rd{19-16} Rn{3-0} Rm{11-8}
-//
-// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT,
-// SMLALD, SMLADLX, SMLSLD, SMLSLDX:
-// RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
-//
-// The mapping of the multiply registers to the "regular" ARM registers, where
-// there are convenience decoder functions, is:
-//
-// Inst{15-12} => Rd
-// Inst{19-16} => Rn
-// Inst{3-0} => Rm
-// Inst{11-8} => Rs
-static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- unsigned short NumDefs = MCID.getNumDefs();
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm");
- assert(NumOps >= 3
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && OpInfo[2].RegClass == ARM::GPRRegClassID
- && "Expect three register operands");
-
- // Sanity check for the register encodings.
- if (BadRegsMulFrm(Opcode, insn))
- return false;
-
- // Instructions with two destination registers have RdLo{15-12} first.
- if (NumDefs == 2) {
- assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
- "Expect 4th register operand");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- // The destination register: RdHi{19-16} or Rd{19-16}.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- // The two src regsiters: Rn{3-0}, then Rm{11-8}.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRs(insn))));
- OpIdx += 3;
-
- // Many multiply instructions (e.g., MLA) have three src registers.
- // The third register operand is Ra{15-12}.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- return true;
-}
-
-// Helper routines for disassembly of coprocessor instructions.
-
-static bool LdStCopOpcode(unsigned Opcode) {
- if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) ||
- (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE))
- return true;
- return false;
-}
-static bool CoprocessorOpcode(unsigned Opcode) {
- if (LdStCopOpcode(Opcode))
- return true;
-
- switch (Opcode) {
- default:
- return false;
- case ARM::CDP: case ARM::CDP2:
- case ARM::MCR: case ARM::MCR2: case ARM::MRC: case ARM::MRC2:
- case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2:
- return true;
- }
-}
-static inline unsigned GetCoprocessor(uint32_t insn) {
- return slice(insn, 11, 8);
-}
-static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) {
- return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21);
-}
-static inline unsigned GetCopOpc2(uint32_t insn) {
- return slice(insn, 7, 5);
-}
-static inline unsigned GetCopOpc(uint32_t insn) {
- return slice(insn, 7, 4);
-}
-// Most of the operands are in immediate forms, except Rd and Rn, which are ARM
-// core registers.
-//
-// CDP, CDP2: cop opc1 CRd CRn CRm opc2
-//
-// MCR, MCR2, MRC, MRC2: cop opc1 Rd CRn CRm opc2
-//
-// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm
-//
-// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00
-// and friends
-// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00
-// and friends
-// <-- addrmode2 -->
-//
-// LDC_OPTION: cop CRd Rn imm8
-// and friends
-// STC_OPTION: cop CRd Rn imm8
-// and friends
-//
-static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr");
-
- unsigned &OpIdx = NumOpsAdded;
- // A8.6.92
- // if coproc == '101x' then SEE "Advanced SIMD and VFP"
- // But since the special instructions have more explicit encoding bits
- // specified, if coproc == 10 or 11, we should reject it as invalid.
- unsigned coproc = GetCoprocessor(insn);
- if ((Opcode == ARM::MCR || Opcode == ARM::MCRR ||
- Opcode == ARM::MRC || Opcode == ARM::MRRC) &&
- (coproc == 10 || coproc == 11)) {
- DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n");
- return false;
- }
-
- bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
- Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
-
- // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
- bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
- bool LdStCop = LdStCopOpcode(Opcode);
- bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2);
-
- OpIdx = 0;
-
- if (RtOut) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
- MI.addOperand(MCOperand::CreateImm(coproc));
- ++OpIdx;
-
- if (LdStCop) {
- // Unindex if P:W = 0b00 --> _OPTION variant
- unsigned PW = getPBit(insn) << 1 | getWBit(insn);
-
- MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- OpIdx += 2;
-
- if (PW) {
- MI.addOperand(MCOperand::CreateReg(0));
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- unsigned IndexMode =
- (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
- unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
- ARM_AM::no_shift, IndexMode);
- MI.addOperand(MCOperand::CreateImm(Offset));
- OpIdx += 2;
- } else {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
- ++OpIdx;
- }
- } else {
- MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
- : GetCopOpc1(insn, NoGPR)));
- ++OpIdx;
-
- if (!RtOut) {
- MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
- : MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn)))
- : MCOperand::CreateImm(decodeRn(insn)));
-
- MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
-
- OpIdx += 2;
-
- if (!OneCopOpc) {
- MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
- ++OpIdx;
- }
- }
-
- return true;
-}
-
-// Branch Instructions.
-// BL: SignExtend(Imm24:'00', 32)
-// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
-// SMC: ZeroExtend(imm4, 32)
-// SVC: ZeroExtend(Imm24, 32)
-//
-// Various coprocessor instructions are assigned BrFrm arbitrarily.
-// Delegates to DisassembleCoprocessor() helper function.
-//
-// MRS/MRSsys: Rd
-// MSR/MSRsys: Rm mask=Inst{19-16}
-// BXJ: Rm
-// MSRi/MSRsysi: so_imm
-// SRSW/SRS: ldstm_mode:$amode mode_imm
-// RFEW/RFE: ldstm_mode:$amode Rn
-static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (CoprocessorOpcode(Opcode))
- return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- // MRS and MRSsys take one GPR reg Rd.
- if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
- assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- NumOpsAdded = 1;
- return true;
- }
- // BXJ takes one GPR reg Rm.
- if (Opcode == ARM::BXJ) {
- assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- NumOpsAdded = 1;
- return true;
- }
- // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in
- // bit 4, and the special register fields in bits 3-0.
- if (Opcode == ARM::MSR) {
- assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
- slice(insn, 19, 16) /* Special Reg */ ));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- NumOpsAdded = 2;
- return true;
- }
- // MSRi take a mask, followed by one so_imm operand. The mask contains the
- // R Bit in bit 4, and the special register fields in bits 3-0.
- if (Opcode == ARM::MSRi) {
- // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
- // The hints instructions have more specific encodings, so if mask == 0,
- // we should reject this as an invalid instruction.
- if (slice(insn, 19, 16) == 0)
- return false;
- MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
- slice(insn, 19, 16) /* Special Reg */ ));
- // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
- // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
- // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
- unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
- unsigned Imm = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
- NumOpsAdded = 2;
- return true;
- }
- if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
- Opcode == ARM::RFEW || Opcode == ARM::RFE) {
- ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
-
- if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
- else
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- NumOpsAdded = 3;
- return true;
- }
-
- assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred
- || Opcode == ARM::SMC || Opcode == ARM::SVC) &&
- "Unexpected Opcode");
-
- assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
-
- int Imm32 = 0;
- if (Opcode == ARM::SMC) {
- // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}.
- Imm32 = slice(insn, 3, 0);
- } else if (Opcode == ARM::SVC) {
- // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}.
- Imm32 = slice(insn, 23, 0);
- } else {
- // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}.
- unsigned Imm26 = slice(insn, 23, 0) << 2;
- //Imm32 = signextend<signed int, 26>(Imm26);
- Imm32 = SignExtend32<26>(Imm26);
- }
-
- MI.addOperand(MCOperand::CreateImm(Imm32));
- NumOpsAdded = 1;
-
- return true;
-}
-
-// Misc. Branch Instructions.
-// BX_RET, MOVPCLR
-// BLX, BLX_pred, BX, BX_pred
-// BLXi
-static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // BX_RET and MOVPCLR have only two predicate operands; do an early return.
- if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
- return true;
-
- // BLX and BX take one GPR reg.
- if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred ||
- Opcode == ARM::BX || Opcode == ARM::BX_pred) {
- assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- OpIdx = 1;
- return true;
- }
-
- // BLXi takes imm32 (the PC offset).
- if (Opcode == ARM::BLXi) {
- assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
- // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}.
- unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1;
- int Imm32 = SignExtend32<26>(Imm26);
- MI.addOperand(MCOperand::CreateImm(Imm32));
- OpIdx = 1;
- return true;
- }
-
- return false;
-}
-
-static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
- uint32_t lsb = slice(insn, 11, 7);
- uint32_t msb = slice(insn, 20, 16);
- uint32_t Val = 0;
- if (msb < lsb) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
-
- for (uint32_t i = lsb; i <= msb; ++i)
- Val |= (1 << i);
- mask = ~Val;
- return true;
-}
-
-// Standard data-processing instructions allow PC as a register specifier,
-// but we should reject other DPFrm instructions with PC as registers.
-static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- default:
- // Did we miss an opcode?
- if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) {
- DEBUG(errs() << "DPFrm with bad reg specifier(s)\n");
- return true;
- }
- case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr:
- case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr:
- case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr:
- case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr:
- return false;
- }
-}
-
-// A major complication is the fact that some of the saturating add/subtract
-// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
-// They are QADD, QDADD, QDSUB, and QSUB.
-static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- unsigned short NumDefs = MCID.getNumDefs();
- bool isUnary = isUnaryDP(MCID.TSFlags);
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // Disassemble register def if there is one.
- if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- // Now disassemble the src operands.
- if (OpIdx >= NumOps)
- return false;
-
- // Special-case handling of BFC/BFI/SBFX/UBFX.
- if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
- // A8.6.17 BFC & A8.6.18 BFI
- // Sanity check Rd.
- if (decodeRd(insn) == 15)
- return false;
- MI.addOperand(MCOperand::CreateReg(0));
- if (Opcode == ARM::BFI) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- ++OpIdx;
- }
- uint32_t mask = 0;
- if (!getBFCInvMask(insn, mask))
- return false;
-
- MI.addOperand(MCOperand::CreateImm(mask));
- OpIdx += 2;
- return true;
- }
- if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
- // Sanity check Rd and Rm.
- if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
- return false;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
- OpIdx += 3;
- return true;
- }
-
- bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD ||
- Opcode == ARM::QDSUB || Opcode == ARM::QSUB);
-
- // BinaryDP has an Rn operand.
- if (!isUnary) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- RmRn ? decodeRm(insn) : decodeRn(insn))));
- ++OpIdx;
- }
-
- // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
- if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Now disassemble operand 2.
- if (OpIdx >= NumOps)
- return false;
-
- if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
- // We have a reg/reg form.
- // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
- // routed here as well.
- // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
- if (BadRegsDPFrm(Opcode, insn))
- return false;
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- RmRn? decodeRn(insn) : decodeRm(insn))));
- ++OpIdx;
- } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
- // These two instructions don't allow d as 15.
- if (decodeRd(insn) == 15)
- return false;
- // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
- assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
- unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
- if (!B->tryAddingSymbolicOperand(Imm16, 4, MI))
- MI.addOperand(MCOperand::CreateImm(Imm16));
- ++OpIdx;
- } else {
- // We have a reg/imm form.
- // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
- // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
- // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
- assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
- unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
- unsigned Imm = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
- ++OpIdx;
- }
-
- return true;
-}
-
-static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- unsigned short NumDefs = MCID.getNumDefs();
- bool isUnary = isUnaryDP(MCID.TSFlags);
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // Disassemble register def if there is one.
- if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- // Disassemble the src operands.
- if (OpIdx >= NumOps)
- return false;
-
- // BinaryDP has an Rn operand.
- if (!isUnary) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
- if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Disassemble operand 2, which consists of three components.
- if (OpIdx + 2 >= NumOps)
- return false;
-
- assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+2].RegClass < 0) &&
- "Expect 3 reg operands");
-
- // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
- unsigned Rs = slice(insn, 4, 4);
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- if (Rs) {
- // If Inst{7} != 0, we should reject this insn as an invalid encoding.
- if (slice(insn, 7, 7))
- return false;
-
- // A8.6.3 ADC (register-shifted register)
- // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
- //
- // This also accounts for shift instructions (register) where, fortunately,
- // Inst{19-16} = 0b0000.
- // A8.6.89 LSL (register)
- // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
- if (decodeRd(insn) == 15 || decodeRn(insn) == 15 ||
- decodeRm(insn) == 15 || decodeRs(insn) == 15)
- return false;
-
- // Register-controlled shifts: [Rm, Rs, shift].
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRs(insn))));
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0)));
- } else {
- // Constant shifts: [Rm, reg0, shift_imm].
- MI.addOperand(MCOperand::CreateReg(0)); // NoRegister
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShImm = slice(insn, 11, 7);
-
- // A8.4.1. Possible rrx or shift amount of 32...
- getImmShiftSE(ShOp, ShImm);
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm)));
- }
- OpIdx += 3;
-
- return true;
-}
-
-static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack,
- bool Imm) {
- const StringRef Name = ARMInsts[Opcode].Name;
- unsigned Rt = decodeRd(insn);
- unsigned Rn = decodeRn(insn);
- unsigned Rm = decodeRm(insn);
- unsigned P = getPBit(insn);
- unsigned W = getWBit(insn);
-
- if (Store) {
- // Only STR (immediate, register) allows PC as the source.
- if (Name.startswith("STRB") && Rt == 15) {
- DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
- return true;
- }
- if (WBack && (Rn == 15 || Rn == Rt)) {
- DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
- return true;
- }
- if (!Imm && Rm == 15) {
- DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
- return true;
- }
- } else {
- // Only LDR (immediate, register) allows PC as the destination.
- if (Name.startswith("LDRB") && Rt == 15) {
- DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
- return true;
- }
- if (Imm) {
- // Immediate
- if (Rn == 15) {
- // The literal form must be in offset mode; it's an encoding error
- // otherwise.
- if (!(P == 1 && W == 0)) {
- DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n");
- return true;
- }
- // LDRB (literal) does not allow PC as the destination.
- if (Opcode != ARM::LDRi12 && Rt == 15) {
- DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
- return true;
- }
- } else {
- // Write back while Rn == Rt does not make sense.
- if (WBack && (Rn == Rt)) {
- DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
- return true;
- }
- }
- } else {
- // Register
- if (Rm == 15) {
- DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
- return true;
- }
- if (WBack && (Rn == 15 || Rn == Rt)) {
- DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
- return true;
- }
- }
- }
- return false;
-}
-
-static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- bool isPrePost = isPrePostLdSt(MCID.TSFlags);
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(((!isStore && MCID.getNumDefs() > 0) ||
- (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
- && "Invalid arguments");
-
- // Operand 0 of a pre- and post-indexed store is the address base writeback.
- if (isPrePost && isStore) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // Disassemble the dst/src operand.
- if (OpIdx >= NumOps)
- return false;
-
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- // After dst of a pre- and post-indexed load is the address base writeback.
- if (isPrePost && !isStore) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // Disassemble the base operand.
- if (OpIdx >= NumOps)
- return false;
-
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
- && "Index mode or tied_to operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
-
- // For reg/reg form, base reg is followed by +/- reg shop imm.
- // For immediate form, it is followed by +/- imm12.
- // See also ARMAddressingModes.h (Addressing Mode #2).
- if (OpIdx + 1 >= NumOps)
- return false;
-
- if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0))
- return false;
-
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
- unsigned IndexMode =
- (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
- if (getIBit(insn) == 0) {
- // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2).
- // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already
- // been populated.
- if (isPrePost) {
- MI.addOperand(MCOperand::CreateReg(0));
- OpIdx += 1;
- }
-
- unsigned Imm12 = slice(insn, 11, 0);
- if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 ||
- Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) {
- // Disassemble the 12-bit immediate offset, which is the second operand in
- // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm).
- int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12;
- MI.addOperand(MCOperand::CreateImm(Offset));
- } else {
- // Disassemble the 12-bit immediate offset, which is the second operand in
- // $am2offset => (ops GPR, i32imm).
- unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift,
- IndexMode);
- MI.addOperand(MCOperand::CreateImm(Offset));
- }
- OpIdx += 1;
- } else {
- // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid.
- if (slice(insn,4,4) == 1)
- return false;
-
- // Disassemble the offset reg (Rm), shift type, and immediate shift length.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShImm = slice(insn, 11, 7);
-
- // A8.4.1. Possible rrx or shift amount of 32...
- getImmShiftSE(ShOp, ShImm);
- MI.addOperand(MCOperand::CreateImm(
- ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode)));
- OpIdx += 2;
- }
-
- return true;
-}
-
-static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B);
-}
-
-static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
-}
-
-static bool HasDualReg(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
- case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
- return true;
- }
-}
-
-static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- bool isPrePost = isPrePostLdSt(MCID.TSFlags);
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(((!isStore && MCID.getNumDefs() > 0) ||
- (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
- && "Invalid arguments");
-
- // Operand 0 of a pre- and post-indexed store is the address base writeback.
- if (isPrePost && isStore) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // Disassemble the dst/src operand.
- if (OpIdx >= NumOps)
- return false;
-
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- // Fill in LDRD and STRD's second operand Rt operand.
- if (HasDualReg(Opcode)) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn) + 1)));
- ++OpIdx;
- }
-
- // After dst of a pre- and post-indexed load is the address base writeback.
- if (isPrePost && !isStore) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // Disassemble the base operand.
- if (OpIdx >= NumOps)
- return false;
-
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
- && "Offset mode or tied_to operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
-
- // For reg/reg form, base reg is followed by +/- reg.
- // For immediate form, it is followed by +/- imm8.
- // See also ARMAddressingModes.h (Addressing Mode #3).
- if (OpIdx + 1 >= NumOps)
- return false;
-
- assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass < 0) &&
- "Expect 1 reg operand followed by 1 imm operand");
-
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
- unsigned IndexMode =
- (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
- if (getAM3IBit(insn) == 1) {
- MI.addOperand(MCOperand::CreateReg(0));
-
- // Disassemble the 8-bit immediate offset.
- unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
- unsigned Imm4L = insn & 0xF;
- unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L,
- IndexMode);
- MI.addOperand(MCOperand::CreateImm(Offset));
- } else {
- // Disassemble the offset reg (Rm).
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode);
- MI.addOperand(MCOperand::CreateImm(Offset));
- }
- OpIdx += 2;
-
- return true;
-}
-
-static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false,
- B);
-}
-
-static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
-}
-
-// The algorithm for disassembly of LdStMulFrm is different from others because
-// it explicitly populates the two predicate operands after the base register.
-// After that, we need to populate the reglist with each affected register
-// encoded as an MCOperand.
-static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4");
- NumOpsAdded = 0;
-
- unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
-
- // Writeback to base, if necessary.
- if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD ||
- Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD ||
- Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD ||
- Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) {
- MI.addOperand(MCOperand::CreateReg(Base));
- ++NumOpsAdded;
- }
-
- // Add the base register operand.
- MI.addOperand(MCOperand::CreateReg(Base));
-
- // Handling the two predicate operands before the reglist.
- int64_t CondVal = getCondField(insn);
- if (CondVal == 0xF)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondVal));
- MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-
- NumOpsAdded += 3;
-
- // Fill the variadic part of reglist.
- unsigned RegListBits = insn & ((1 << 16) - 1);
- for (unsigned i = 0; i < 16; ++i) {
- if ((RegListBits >> i) & 1) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- i)));
- ++NumOpsAdded;
- }
- }
-
- return true;
-}
-
-// LDREX, LDREXB, LDREXH: Rd Rn
-// LDREXD: Rd Rd+1 Rn
-// STREX, STREXB, STREXH: Rd Rm Rn
-// STREXD: Rd Rm Rm+1 Rn
-//
-// SWP, SWPB: Rd Rm Rn
-static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && "Expect 2 reg operands");
-
- bool isStore = slice(insn, 20, 20) == 0;
- bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- // Store register Exclusive needs a source operand.
- if (isStore) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- ++OpIdx;
-
- if (isDW) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn)+1)));
- ++OpIdx;
- }
- } else if (isDW) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn)+1)));
- ++OpIdx;
- }
-
- // Finally add the pointer operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
-
- return true;
-}
-
-// Misc. Arithmetic Instructions.
-// CLZ: Rd Rm
-// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
-// RBIT, REV, REV16, REVSH: Rd Rm
-static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && "Expect 2 reg operands");
-
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
-
- // Sanity check the registers, which should not be 15.
- if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
- return false;
- if (ThreeReg && decodeRn(insn) == 15)
- return false;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- if (ThreeReg) {
- assert(NumOps >= 4 && "Expect >= 4 operands");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- ++OpIdx;
-
- // If there is still an operand info left which is an immediate operand, add
- // an additional imm5 LSL/ASR operand.
- if (ThreeReg && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Extract the 5-bit immediate field Inst{11-7}.
- unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
- ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
- if (Opcode == ARM::PKHBT)
- Opc = ARM_AM::lsl;
- else if (Opcode == ARM::PKHTB)
- Opc = ARM_AM::asr;
- getImmShiftSE(Opc, ShiftAmt);
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
- ++OpIdx;
- }
-
- return true;
-}
-
-/// DisassembleSatFrm - Disassemble saturate instructions:
-/// SSAT, SSAT16, USAT, and USAT16.
-static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // A8.6.183 SSAT
- // if d == 15 || n == 15 then UNPREDICTABLE;
- if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
- return false;
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
-
- // Disassemble register def.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- unsigned Pos = slice(insn, 20, 16);
- if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
- Pos += 1;
- MI.addOperand(MCOperand::CreateImm(Pos));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- if (NumOpsAdded == 4) {
- ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 11, 7);
- if (ShAmt == 0) {
- // A8.6.183. Possible ASR shift amount of 32...
- if (Opc == ARM_AM::asr)
- ShAmt = 32;
- else
- Opc = ARM_AM::no_shift;
- }
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
- }
- return true;
-}
-
-// Extend instructions.
-// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
-// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
-// three register operand form. Otherwise, Rn=0b1111 and only Rm is used.
-static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // A8.6.220 SXTAB
- // if d == 15 || m == 15 then UNPREDICTABLE;
- if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
- return false;
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && "Expect 2 reg operands");
-
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- ++OpIdx;
-
- // If there is still an operand info left which is an immediate operand, add
- // an additional rotate immediate operand.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Extract the 2-bit rotate field Inst{11-10}.
- unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
- // Rotation by 8, 16, or 24 bits.
- MI.addOperand(MCOperand::CreateImm(rot << 3));
- ++OpIdx;
- }
-
- return true;
-}
-
-/////////////////////////////////////
-// //
-// Utility Functions For VFP //
-// //
-/////////////////////////////////////
-
-// Extract/Decode Dd/Sd:
-//
-// SP => d = UInt(Vd:D)
-// DP => d = UInt(D:Vd)
-static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) {
- return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn))
- : (decodeRd(insn) | getDBit(insn) << 4);
-}
-
-// Extract/Decode Dn/Sn:
-//
-// SP => n = UInt(Vn:N)
-// DP => n = UInt(N:Vn)
-static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) {
- return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn))
- : (decodeRn(insn) | getNBit(insn) << 4);
-}
-
-// Extract/Decode Dm/Sm:
-//
-// SP => m = UInt(Vm:M)
-// DP => m = UInt(M:Vm)
-static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
- return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn))
- : (decodeRm(insn) | getMBit(insn) << 4);
-}
-
-// A7.5.1
-static APInt VFPExpandImm(unsigned char byte, unsigned N) {
- assert(N == 32 || N == 64);
-
- uint64_t Result;
- unsigned bit6 = slice(byte, 6, 6);
- if (N == 32) {
- Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19;
- if (bit6)
- Result |= 0x1f << 25;
- else
- Result |= 0x1 << 30;
- } else {
- Result = (uint64_t)slice(byte, 7, 7) << 63 |
- (uint64_t)slice(byte, 5, 0) << 48;
- if (bit6)
- Result |= 0xffULL << 54;
- else
- Result |= 0x1ULL << 62;
- }
- return APInt(N, Result);
-}
-
-// VFP Unary Format Instructions:
-//
-// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero
-// VCVTDS, VCVTSD: converts between double-precision and single-precision
-// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
-static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned RegClass = OpInfo[OpIdx].RegClass;
- assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
- "Reg operand expected");
- bool isSP = (RegClass == ARM::SPRRegClassID);
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
- ++OpIdx;
-
- // Early return for compare with zero instructions.
- if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS
- || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS)
- return true;
-
- RegClass = OpInfo[OpIdx].RegClass;
- assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
- "Reg operand expected");
- isSP = (RegClass == ARM::SPRRegClassID);
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
- ++OpIdx;
-
- return true;
-}
-
-// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs.
-// Some of them have operand constraints which tie the first operand in the
-// InOperandList to that of the dst. As far as asm printing is concerned, this
-// tied_to operand is simply skipped.
-static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned RegClass = OpInfo[OpIdx].RegClass;
- assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
- "Reg operand expected");
- bool isSP = (RegClass == ARM::SPRRegClassID);
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
- ++OpIdx;
-
- // Skip tied_to operand constraint.
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
- assert(NumOps >= 4 && "Expect >=4 operands");
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP))));
- ++OpIdx;
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
- ++OpIdx;
-
- return true;
-}
-
-// A8.6.295 vcvt (floating-point <-> integer)
-// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
-// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
-//
-// A8.6.297 vcvt (floating-point and fixed-point)
-// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
-static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
- bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
- unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
-
- if (fixed_point) {
- // A8.6.297
- assert(NumOps >= 3 && "Expect >= 3 operands");
- int size = slice(insn, 7, 7) == 0 ? 16 : 32;
- int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClassID,
- decodeVFPRd(insn, SP))));
-
- assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
- "Tied to operand expected");
- MI.addOperand(MI.getOperand(0));
-
- assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
- !OpInfo[2].isOptionalDef() && "Imm operand expected");
- MI.addOperand(MCOperand::CreateImm(fbits));
-
- NumOpsAdded = 3;
- } else {
- // A8.6.295
- // The Rd (destination) and Rm (source) bits have different interpretations
- // depending on their single-precisonness.
- unsigned d, m;
- if (slice(insn, 18, 18) == 1) { // to_integer operation
- d = decodeVFPRd(insn, true /* Is Single Precision */);
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::SPRRegClassID, d)));
- m = decodeVFPRm(insn, SP);
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m)));
- } else {
- d = decodeVFPRd(insn, SP);
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d)));
- m = decodeVFPRm(insn, true /* Is Single Precision */);
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::SPRRegClassID, m)));
- }
- NumOpsAdded = 2;
- }
-
- return true;
-}
-
-// VMOVRS - A8.6.330
-// Rt => Rd; Sn => UInt(Vn:N)
-static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- decodeVFPRn(insn, true))));
- NumOpsAdded = 2;
- return true;
-}
-
-// VMOVRRD - A8.6.332
-// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
-//
-// VMOVRRS - A8.6.331
-// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
-static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- OpIdx = 2;
-
- if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
- unsigned Sm = decodeVFPRm(insn, true);
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- Sm)));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- Sm+1)));
- OpIdx += 2;
- } else {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::DPRRegClassID,
- decodeVFPRm(insn, false))));
- ++OpIdx;
- }
- return true;
-}
-
-// VMOVSR - A8.6.330
-// Rt => Rd; Sn => UInt(Vn:N)
-static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- decodeVFPRn(insn, true))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- NumOpsAdded = 2;
- return true;
-}
-
-// VMOVDRR - A8.6.332
-// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
-//
-// VMOVRRS - A8.6.331
-// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
-static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
- unsigned Sm = decodeVFPRm(insn, true);
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- Sm)));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- Sm+1)));
- OpIdx += 2;
- } else {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::DPRRegClassID,
- decodeVFPRm(insn, false))));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- OpIdx += 2;
- return true;
-}
-
-// VFP Load/Store Instructions.
-// VLDRD, VLDRS, VSTRD, VSTRS
-static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
-
- bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
- unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
-
- // Extract Dd/Sd for operand 0.
- unsigned RegD = decodeVFPRd(insn, isSPVFP);
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD)));
-
- unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
- MI.addOperand(MCOperand::CreateReg(Base));
-
- // Next comes the AM5 Opcode.
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
- unsigned char Imm8 = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8)));
-
- NumOpsAdded = 3;
-
- return true;
-}
-
-// VFP Load/Store Multiple Instructions.
-// We have an optional write back reg, the base, and two predicate operands.
-// It is then followed by a reglist of either DPR(s) or SPR(s).
-//
-// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
-static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
-
- // Writeback to base, if necessary.
- if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD ||
- Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD ||
- Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD ||
- Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) {
- MI.addOperand(MCOperand::CreateReg(Base));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(Base));
-
- // Handling the two predicate operands before the reglist.
- int64_t CondVal = getCondField(insn);
- if (CondVal == 0xF)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondVal));
- MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-
- OpIdx += 3;
-
- bool isSPVFP = (Opcode == ARM::VLDMSIA ||
- Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
- Opcode == ARM::VSTMSIA ||
- Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
- unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
-
- // Extract Dd/Sd.
- unsigned RegD = decodeVFPRd(insn, isSPVFP);
-
- // Fill the variadic part of reglist.
- unsigned char Imm8 = insn & 0xFF;
- unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
-
- // Apply some sanity checks before proceeding.
- if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16))
- return false;
-
- for (unsigned i = 0; i < Regs; ++i) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
- RegD + i)));
- ++OpIdx;
- }
-
- return true;
-}
-
-// Misc. VFP Instructions.
-// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand)
-// FCONSTD (DPR and a VFPf64Imm operand)
-// FCONSTS (SPR and a VFPf32Imm operand)
-// VMRS/VMSR (GPR operand)
-static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- if (Opcode == ARM::FMSTAT)
- return true;
-
- assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands");
-
- unsigned RegEnum = 0;
- switch (OpInfo[0].RegClass) {
- case ARM::DPRRegClassID:
- RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false));
- break;
- case ARM::SPRRegClassID:
- RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true));
- break;
- case ARM::GPRRegClassID:
- RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn));
- break;
- default:
- assert(0 && "Invalid reg class id");
- return false;
- }
-
- MI.addOperand(MCOperand::CreateReg(RegEnum));
- ++OpIdx;
-
- // Extract/decode the f64/f32 immediate.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // The asm syntax specifies the floating point value, not the 8-bit literal.
- APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
- Opcode == ARM::FCONSTD ? 64 : 32);
- APFloat immFP = APFloat(immRaw, true);
- double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
- immFP.convertToFloat();
- MI.addOperand(MCOperand::CreateFPImm(imm));
-
- ++OpIdx;
- }
-
- return true;
-}
-
-// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file.
-#include "ThumbDisassemblerCore.h"
-
-/////////////////////////////////////////////////////
-// //
-// Utility Functions For ARM Advanced SIMD //
-// //
-/////////////////////////////////////////////////////
-
-// The following NEON namings are based on A8.6.266 VABA, VABAL. Notice that
-// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL.
-
-// A7.3 Register encoding
-
-// Extract/Decode NEON D/Vd:
-//
-// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for
-// doubleword, Dd = UInt(D:Vd). We compensate for this difference by
-// handling it in the getRegisterEnum() utility function.
-// D = Inst{22}, Vd = Inst{15-12}
-static unsigned decodeNEONRd(uint32_t insn) {
- return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
- | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
-}
-
-// Extract/Decode NEON N/Vn:
-//
-// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for
-// doubleword, Dn = UInt(N:Vn). We compensate for this difference by
-// handling it in the getRegisterEnum() utility function.
-// N = Inst{7}, Vn = Inst{19-16}
-static unsigned decodeNEONRn(uint32_t insn) {
- return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
- | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
-}
-
-// Extract/Decode NEON M/Vm:
-//
-// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for
-// doubleword, Dm = UInt(M:Vm). We compensate for this difference by
-// handling it in the getRegisterEnum() utility function.
-// M = Inst{5}, Vm = Inst{3-0}
-static unsigned decodeNEONRm(uint32_t insn) {
- return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
- | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
-}
-
-namespace {
-enum ElemSize {
- ESizeNA = 0,
- ESize8 = 8,
- ESize16 = 16,
- ESize32 = 32,
- ESize64 = 64
-};
-} // End of unnamed namespace
-
-// size field -> Inst{11-10}
-// index_align field -> Inst{7-4}
-//
-// The Lane Index interpretation depends on the Data Size:
-// 8 (encoded as size = 0b00) -> Index = index_align[3:1]
-// 16 (encoded as size = 0b01) -> Index = index_align[3:2]
-// 32 (encoded as size = 0b10) -> Index = index_align[3]
-//
-// Ref: A8.6.317 VLD4 (single 4-element structure to one lane).
-static unsigned decodeLaneIndex(uint32_t insn) {
- unsigned size = insn >> 10 & 3;
- assert((size == 0 || size == 1 || size == 2) &&
- "Encoding error: size should be either 0, 1, or 2");
-
- unsigned index_align = insn >> 4 & 0xF;
- return (index_align >> 1) >> size;
-}
-
-// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4)
-// op = Inst{5}, cmode = Inst{11-8}
-// i = Inst{24} (ARM architecture)
-// imm3 = Inst{18-16}, imm4 = Inst{3-0}
-// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
-static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
- unsigned char op = (insn >> 5) & 1;
- unsigned char cmode = (insn >> 8) & 0xF;
- unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
- ((insn >> 16) & 7) << 4 |
- (insn & 0xF);
- return (op << 12) | (cmode << 8) | Imm8;
-}
-
-// A8.6.339 VMUL, VMULL (by scalar)
-// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7
-// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15
-static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) {
- switch (esize) {
- case ESize16:
- return insn & 7;
- case ESize32:
- return insn & 0xF;
- default:
- assert(0 && "Unreachable code!");
- return 0;
- }
-}
-
-// A8.6.339 VMUL, VMULL (by scalar)
-// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7
-// ESize32 => index = Inst{5} (M) D0-D15
-static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) {
- switch (esize) {
- case ESize16:
- return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1);
- case ESize32:
- return (insn >> 5) & 1;
- default:
- assert(0 && "Unreachable code!");
- return 0;
- }
-}
-
-// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD)
-// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}.
-static unsigned decodeVCVTFractionBits(uint32_t insn) {
- return 64 - ((insn >> 16) & 0x3F);
-}
-
-// A8.6.302 VDUP (scalar)
-// ESize8 => index = Inst{19-17}
-// ESize16 => index = Inst{19-18}
-// ESize32 => index = Inst{19}
-static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) {
- switch (esize) {
- case ESize8:
- return (insn >> 17) & 7;
- case ESize16:
- return (insn >> 18) & 3;
- case ESize32:
- return (insn >> 19) & 1;
- default:
- assert(0 && "Unspecified element size!");
- return 0;
- }
-}
-
-// A8.6.328 VMOV (ARM core register to scalar)
-// A8.6.329 VMOV (scalar to ARM core register)
-// ESize8 => index = Inst{21:6-5}
-// ESize16 => index = Inst{21:6}
-// ESize32 => index = Inst{21}
-static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) {
- switch (esize) {
- case ESize8:
- return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3);
- case ESize16:
- return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1);
- case ESize32:
- return ((insn >> 21) & 1);
- default:
- assert(0 && "Unspecified element size!");
- return 0;
- }
-}
-
-// Imm6 = Inst{21-16}, L = Inst{7}
-//
-// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI):
-// case L:imm6 of
-// '0001xxx' => esize = 8; shift_amount = imm6 - 8
-// '001xxxx' => esize = 16; shift_amount = imm6 - 16
-// '01xxxxx' => esize = 32; shift_amount = imm6 - 32
-// '1xxxxxx' => esize = 64; shift_amount = imm6
-//
-// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN):
-// case L:imm6 of
-// '0001xxx' => esize = 8; shift_amount = 16 - imm6
-// '001xxxx' => esize = 16; shift_amount = 32 - imm6
-// '01xxxxx' => esize = 32; shift_amount = 64 - imm6
-// '1xxxxxx' => esize = 64; shift_amount = 64 - imm6
-//
-static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) {
- ElemSize esize = ESizeNA;
- unsigned L = (insn >> 7) & 1;
- unsigned imm6 = (insn >> 16) & 0x3F;
- if (L == 0) {
- if (imm6 >> 3 == 1)
- esize = ESize8;
- else if (imm6 >> 4 == 1)
- esize = ESize16;
- else if (imm6 >> 5 == 1)
- esize = ESize32;
- else
- assert(0 && "Wrong encoding of Inst{7:21-16}!");
- } else
- esize = ESize64;
-
- if (LeftShift)
- return esize == ESize64 ? imm6 : (imm6 - esize);
- else
- return esize == ESize64 ? (esize - imm6) : (2*esize - imm6);
-}
-
-// A8.6.305 VEXT
-// Imm4 = Inst{11-8}
-static unsigned decodeN3VImm(uint32_t insn) {
- return (insn >> 8) & 0xF;
-}
-
-// VLD*
-// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
-// VLD*LN*
-// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx)
-// VST*
-// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ...
-// VST*LN*
-// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)]
-//
-// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
-static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
- unsigned alignment, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- // At least one DPR register plus addressing mode #6.
- assert(NumOps >= 3 && "Expect >= 3 operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // We have homogeneous NEON registers for Load/Store.
- unsigned RegClass = 0;
-
- // Double-spaced registers have increments of 2.
- unsigned Inc = DblSpaced ? 2 : 1;
-
- unsigned Rn = decodeRn(insn);
- unsigned Rm = decodeRm(insn);
- unsigned Rd = decodeNEONRd(insn);
-
- // A7.7.1 Advanced SIMD addressing mode.
- bool WB = Rm != 15;
-
- // LLVM Addressing Mode #6.
- unsigned RmEnum = 0;
- if (WB && Rm != 13)
- RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm);
-
- if (Store) {
- // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
- // then possible lane index.
- assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
-
- if (WB) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- Rn)));
- ++OpIdx;
- }
-
- assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
- // addrmode6 := (ops GPR:$addr, i32imm)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- Rn)));
- MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
- OpIdx += 2;
-
- if (WB) {
- MI.addOperand(MCOperand::CreateReg(RmEnum));
- ++OpIdx;
- }
-
- assert(OpIdx < NumOps &&
- (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
- OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
- "Reg operand expected");
-
- RegClass = OpInfo[OpIdx].RegClass;
- while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd)));
- Rd += Inc;
- ++OpIdx;
- }
-
- // Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
- ++OpIdx;
- }
-
- } else {
- // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg,
- // possible TIED_TO DPR/QPR's (ignored), then possible lane index.
- RegClass = OpInfo[0].RegClass;
-
- while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd)));
- Rd += Inc;
- ++OpIdx;
- }
-
- if (WB) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- Rn)));
- ++OpIdx;
- }
-
- assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
- // addrmode6 := (ops GPR:$addr, i32imm)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- Rn)));
- MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
- OpIdx += 2;
-
- if (WB) {
- MI.addOperand(MCOperand::CreateReg(RmEnum));
- ++OpIdx;
- }
-
- while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
- assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 &&
- "Tied to operand expected");
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
- ++OpIdx;
- }
- }
-
- // Accessing registers past the end of the NEON register file is not
- // defined.
- if (Rd > 32)
- return false;
-
- return true;
-}
-
-// A8.6.308, A8.6.311, A8.6.314, A8.6.317.
-static bool Align4OneLaneInst(unsigned elem, unsigned size,
- unsigned index_align, unsigned & alignment) {
- unsigned bits = 0;
- switch (elem) {
- default:
- return false;
- case 1:
- // A8.6.308
- if (size == 0)
- return slice(index_align, 0, 0) == 0;
- else if (size == 1) {
- bits = slice(index_align, 1, 0);
- if (bits != 0 && bits != 1)
- return false;
- if (bits == 1)
- alignment = 16;
- return true;
- } else if (size == 2) {
- bits = slice(index_align, 2, 0);
- if (bits != 0 && bits != 3)
- return false;
- if (bits == 3)
- alignment = 32;
- return true;;
- }
- return true;
- case 2:
- // A8.6.311
- if (size == 0) {
- if (slice(index_align, 0, 0) == 1)
- alignment = 16;
- return true;
- } if (size == 1) {
- if (slice(index_align, 0, 0) == 1)
- alignment = 32;
- return true;
- } else if (size == 2) {
- if (slice(index_align, 1, 1) != 0)
- return false;
- if (slice(index_align, 0, 0) == 1)
- alignment = 64;
- return true;;
- }
- return true;
- case 3:
- // A8.6.314
- if (size == 0) {
- if (slice(index_align, 0, 0) != 0)
- return false;
- return true;
- } if (size == 1) {
- if (slice(index_align, 0, 0) != 0)
- return false;
- return true;
- return true;
- } else if (size == 2) {
- if (slice(index_align, 1, 0) != 0)
- return false;
- return true;;
- }
- return true;
- case 4:
- // A8.6.317
- if (size == 0) {
- if (slice(index_align, 0, 0) == 1)
- alignment = 32;
- return true;
- } if (size == 1) {
- if (slice(index_align, 0, 0) == 1)
- alignment = 64;
- return true;
- } else if (size == 2) {
- bits = slice(index_align, 1, 0);
- if (bits == 3)
- return false;
- if (bits == 1)
- alignment = 64;
- else if (bits == 2)
- alignment = 128;
- return true;;
- }
- return true;
- }
-}
-
-// A7.7
-// If L (Inst{21}) == 0, store instructions.
-// Find out about double-spaced-ness of the Opcode and pass it on to
-// DisassembleNLdSt0().
-static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const StringRef Name = ARMInsts[Opcode].Name;
- bool DblSpaced = false;
- // 0 represents standard alignment, i.e., unaligned data access.
- unsigned alignment = 0;
-
- unsigned elem = 0; // legal values: {1, 2, 3, 4}
- if (Name.startswith("VST1") || Name.startswith("VLD1"))
- elem = 1;
-
- if (Name.startswith("VST2") || Name.startswith("VLD2"))
- elem = 2;
-
- if (Name.startswith("VST3") || Name.startswith("VLD3"))
- elem = 3;
-
- if (Name.startswith("VST4") || Name.startswith("VLD4"))
- elem = 4;
-
- if (Name.find("LN") != std::string::npos) {
- // To one lane instructions.
- // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
-
- // Utility function takes number of elements, size, and index_align.
- if (!Align4OneLaneInst(elem,
- slice(insn, 11, 10),
- slice(insn, 7, 4),
- alignment))
- return false;
-
- // <size> == 16 && Inst{5} == 1 --> DblSpaced = true
- if (Name.endswith("16") || Name.endswith("16_UPD"))
- DblSpaced = slice(insn, 5, 5) == 1;
-
- // <size> == 32 && Inst{6} == 1 --> DblSpaced = true
- if (Name.endswith("32") || Name.endswith("32_UPD"))
- DblSpaced = slice(insn, 6, 6) == 1;
- } else if (Name.find("DUP") != std::string::npos) {
- // Single element (or structure) to all lanes.
- // Inst{9-8} encodes the number of element(s) in the structure, with:
- // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32.
- // 0b01 (VLD2DUP)
- // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0)
- // 0b11 (VLD4DUP)
- //
- // Inst{7-6} encodes the data size, with:
- // 0b00 => 8, 0b01 => 16, 0b10 => 32
- //
- // Inst{4} (the a bit) encodes the align action (0: standard alignment)
- unsigned elem = slice(insn, 9, 8) + 1;
- unsigned a = slice(insn, 4, 4);
- if (elem != 3) {
- // 0b11 is not a valid encoding for Inst{7-6}.
- if (slice(insn, 7, 6) == 3)
- return false;
- unsigned data_size = 8 << slice(insn, 7, 6);
- // For VLD1DUP, a bit makes sense only for data size of 16 and 32.
- if (a && data_size == 8)
- return false;
-
- // Now we can calculate the alignment!
- if (a)
- alignment = elem * data_size;
- } else {
- if (a) {
- // A8.6.315 VLD3 (single 3-element structure to all lanes)
- // The a bit must be encoded as 0.
- return false;
- }
- }
- } else {
- // Multiple n-element structures with type encoded as Inst{11-8}.
- // See, for example, A8.6.316 VLD4 (multiple 4-element structures).
-
- // Inst{5-4} encodes alignment.
- unsigned align = slice(insn, 5, 4);
- switch (align) {
- default:
- break;
- case 1:
- alignment = 64; break;
- case 2:
- alignment = 128; break;
- case 3:
- alignment = 256; break;
- }
-
- unsigned type = slice(insn, 11, 8);
- // Reject UNDEFINED instructions based on type and align.
- // Plus set DblSpaced flag where appropriate.
- switch (elem) {
- default:
- break;
- case 1:
- // n == 1
- // A8.6.307 & A8.6.391
- if ((type == 7 && slice(align, 1, 1) == 1) ||
- (type == 10 && align == 3) ||
- (type == 6 && slice(align, 1, 1) == 1))
- return false;
- break;
- case 2:
- // n == 2 && type == 0b1001 -> DblSpaced = true
- // A8.6.310 & A8.6.393
- if ((type == 8 || type == 9) && align == 3)
- return false;
- DblSpaced = (type == 9);
- break;
- case 3:
- // n == 3 && type == 0b0101 -> DblSpaced = true
- // A8.6.313 & A8.6.395
- if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1)
- return false;
- DblSpaced = (type == 5);
- break;
- case 4:
- // n == 4 && type == 0b0001 -> DblSpaced = true
- // A8.6.316 & A8.6.397
- if (slice(insn, 7, 6) == 3)
- return false;
- DblSpaced = (type == 1);
- break;
- }
- }
- return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
- slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B);
-}
-
-// VMOV (immediate)
-// Qd/Dd imm
-// VBIC (immediate)
-// VORR (immediate)
-// Qd/Dd imm src(=Qd/Dd)
-static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- assert(NumOps >= 2 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass < 0) &&
- "Expect 1 reg operand followed by 1 imm operand");
-
- // Qd/Dd = Inst{22:15-12} => NEON Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
- decodeNEONRd(insn))));
-
- ElemSize esize = ESizeNA;
- switch (Opcode) {
- case ARM::VMOVv8i8:
- case ARM::VMOVv16i8:
- esize = ESize8;
- break;
- case ARM::VMOVv4i16:
- case ARM::VMOVv8i16:
- case ARM::VMVNv4i16:
- case ARM::VMVNv8i16:
- case ARM::VBICiv4i16:
- case ARM::VBICiv8i16:
- case ARM::VORRiv4i16:
- case ARM::VORRiv8i16:
- esize = ESize16;
- break;
- case ARM::VMOVv2i32:
- case ARM::VMOVv4i32:
- case ARM::VMVNv2i32:
- case ARM::VMVNv4i32:
- case ARM::VBICiv2i32:
- case ARM::VBICiv4i32:
- case ARM::VORRiv2i32:
- case ARM::VORRiv4i32:
- esize = ESize32;
- break;
- case ARM::VMOVv1i64:
- case ARM::VMOVv2i64:
- esize = ESize64;
- break;
- default:
- assert(0 && "Unexpected opcode!");
- return false;
- }
-
- // One register and a modified immediate value.
- // Add the imm operand.
- MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
-
- NumOpsAdded = 2;
-
- // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in.
- if (NumOps >= 3 &&
- (OpInfo[2].RegClass == ARM::DPRRegClassID ||
- OpInfo[2].RegClass == ARM::QPRRegClassID)) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
- decodeNEONRd(insn))));
- NumOpsAdded += 1;
- }
-
- return true;
-}
-
-namespace {
-enum N2VFlag {
- N2V_None,
- N2V_VectorDupLane,
- N2V_VectorConvert_Between_Float_Fixed
-};
-} // End of unnamed namespace
-
-// Vector Convert [between floating-point and fixed-point]
-// Qd/Dd Qm/Dm [fbits]
-//
-// Vector Duplicate Lane (from scalar to all elements) Instructions.
-// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q:
-// Qd/Dd Dm index
-//
-// Vector Move Long:
-// Qd Dm
-//
-// Vector Move Narrow:
-// Dd Qm
-//
-// Others
-static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opc];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- assert(NumOps >= 2 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == ARM::DPRRegClassID ||
- OpInfo[1].RegClass == ARM::QPRRegClassID) &&
- "Expect >= 2 operands and first 2 as reg operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- ElemSize esize = ESizeNA;
- if (Flag == N2V_VectorDupLane) {
- // VDUPLN has its index embedded. Its size can be inferred from the Opcode.
- assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q &&
- "Unexpected Opcode");
- esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8
- : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16
- : ESize32);
- }
-
- // Qd/Dd = Inst{22:15-12} => NEON Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRd(insn))));
- ++OpIdx;
-
- // VPADAL...
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
- // TIED_TO operand.
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Dm = Inst{5:3-0} => NEON Rm
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRm(insn))));
- ++OpIdx;
-
- // VZIP and others have two TIED_TO reg operands.
- int Idx;
- while (OpIdx < NumOps &&
- (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // Add TIED_TO operand.
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- }
-
- // Add the imm operand, if required.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
-
- unsigned imm = 0xFFFFFFFF;
-
- if (Flag == N2V_VectorDupLane)
- imm = decodeNVLaneDupIndex(insn, esize);
- if (Flag == N2V_VectorConvert_Between_Float_Fixed)
- imm = decodeVCVTFractionBits(insn);
-
- assert(imm != 0xFFFFFFFF && "Internal error");
- MI.addOperand(MCOperand::CreateImm(imm));
- ++OpIdx;
- }
-
- return true;
-}
-
-static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
- N2V_None, B);
-}
-static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
- N2V_VectorConvert_Between_Float_Fixed, B);
-}
-static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
- N2V_VectorDupLane, B);
-}
-
-// Vector Shift [Accumulate] Instructions.
-// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt
-//
-// Vector Shift Left Long (with maximum shift count) Instructions.
-// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
-//
-static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- assert(NumOps >= 3 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == ARM::DPRRegClassID ||
- OpInfo[1].RegClass == ARM::QPRRegClassID) &&
- "Expect >= 3 operands and first 2 as reg operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // Qd/Dd = Inst{22:15-12} => NEON Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRd(insn))));
- ++OpIdx;
-
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
- // TIED_TO operand.
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
- OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
- "Reg operand expected");
-
- // Qm/Dm = Inst{5:3-0} => NEON Rm
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRm(insn))));
- ++OpIdx;
-
- assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
-
- // Add the imm operand.
-
- // VSHLL has maximum shift count as the imm, inferred from its size.
- unsigned Imm;
- switch (Opcode) {
- default:
- Imm = decodeNVSAmt(insn, LeftShift);
- break;
- case ARM::VSHLLi8:
- Imm = 8;
- break;
- case ARM::VSHLLi16:
- Imm = 16;
- break;
- case ARM::VSHLLi32:
- Imm = 32;
- break;
- }
- MI.addOperand(MCOperand::CreateImm(Imm));
- ++OpIdx;
-
- return true;
-}
-
-// Left shift instructions.
-static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true,
- B);
-}
-// Right shift instructions have different shift amount interpretation.
-static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false,
- B);
-}
-
-namespace {
-enum N3VFlag {
- N3V_None,
- N3V_VectorExtract,
- N3V_VectorShift,
- N3V_Multiply_By_Scalar
-};
-} // End of unnamed namespace
-
-// NEON Three Register Instructions with Optional Immediate Operand
-//
-// Vector Extract Instructions.
-// Qd/Dd Qn/Dn Qm/Dm imm4
-//
-// Vector Shift (Register) Instructions.
-// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n)
-//
-// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions.
-// Qd/Dd Qn/Dn RestrictedDm index
-//
-// Others
-static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
- assert(NumOps >= 3 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == ARM::DPRRegClassID ||
- OpInfo[1].RegClass == ARM::QPRRegClassID) &&
- "Expect >= 3 operands and first 2 as reg operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- bool VdVnVm = Flag == N3V_VectorShift ? false : true;
- bool IsImm4 = Flag == N3V_VectorExtract ? true : false;
- bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false;
- ElemSize esize = ESizeNA;
- if (Flag == N3V_Multiply_By_Scalar) {
- unsigned size = (insn >> 20) & 3;
- if (size == 1) esize = ESize16;
- if (size == 2) esize = ESize32;
- assert (esize == ESize16 || esize == ESize32);
- }
-
- // Qd/Dd = Inst{22:15-12} => NEON Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRd(insn))));
- ++OpIdx;
-
- // VABA, VABAL, VBSLd, VBSLq, ...
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
- // TIED_TO operand.
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Dn = Inst{7:19-16} => NEON Rn
- // or
- // Dm = Inst{5:3-0} => NEON Rm
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- VdVnVm ? decodeNEONRn(insn)
- : decodeNEONRm(insn))));
- ++OpIdx;
-
- // Dm = Inst{5:3-0} => NEON Rm
- // or
- // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
- // or
- // Dn = Inst{7:19-16} => NEON Rn
- unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize)
- : decodeNEONRm(insn))
- : decodeNEONRn(insn);
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
- ++OpIdx;
-
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Add the imm operand.
- unsigned Imm = 0;
- if (IsImm4)
- Imm = decodeN3VImm(insn);
- else if (IsDmRestricted)
- Imm = decodeRestrictedDmIndex(insn, esize);
- else {
- assert(0 && "Internal error: unreachable code!");
- return false;
- }
-
- MI.addOperand(MCOperand::CreateImm(Imm));
- ++OpIdx;
- }
-
- return true;
-}
-
-static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- N3V_None, B);
-}
-static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- N3V_VectorShift, B);
-}
-static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- N3V_VectorExtract, B);
-}
-static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- N3V_Multiply_By_Scalar, B);
-}
-
-// Vector Table Lookup
-//
-// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm
-// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm
-// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
-// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
-static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::DPRRegClassID &&
- OpInfo[1].RegClass == ARM::DPRRegClassID &&
- OpInfo[2].RegClass == ARM::DPRRegClassID &&
- "Expect >= 3 operands and first 3 as reg operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned Rn = decodeNEONRn(insn);
-
- // {Dn} encoded as len = 0b00
- // {Dn Dn+1} encoded as len = 0b01
- // {Dn Dn+1 Dn+2 } encoded as len = 0b10
- // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11
- unsigned Len = slice(insn, 9, 8) + 1;
-
- // Dd (the destination vector)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- decodeNEONRd(insn))));
- ++OpIdx;
-
- // Process tied_to operand constraint.
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- }
-
- // Do the <list> now.
- for (unsigned i = 0; i < Len; ++i) {
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- Rn + i)));
- ++OpIdx;
- }
-
- // Dm (the index vector)
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
- "Reg operand (index vector) expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- decodeNEONRm(insn))));
- ++OpIdx;
-
- return true;
-}
-
-// Vector Get Lane (move scalar to ARM core register) Instructions.
-// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
-static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::DPRRegClassID &&
- OpInfo[2].RegClass < 0 &&
- "Expect >= 3 operands with one dst operand");
-
- ElemSize esize =
- Opcode == ARM::VGETLNi32 ? ESize32
- : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
- : ESize8);
-
- // Rt = Inst{15-12} => ARM Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- // Dn = Inst{7:19-16} => NEON Rn
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- decodeNEONRn(insn))));
-
- MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
-
- NumOpsAdded = 3;
- return true;
-}
-
-// Vector Set Lane (move ARM core register to scalar) Instructions.
-// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
-static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::DPRRegClassID &&
- OpInfo[1].RegClass == ARM::DPRRegClassID &&
- MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
- OpInfo[3].RegClass < 0 &&
- "Expect >= 3 operands with one dst operand");
-
- ElemSize esize =
- Opcode == ARM::VSETLNi8 ? ESize8
- : (Opcode == ARM::VSETLNi16 ? ESize16
- : ESize32);
-
- // Dd = Inst{7:19-16} => NEON Rn
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- decodeNEONRn(insn))));
-
- // TIED_TO operand.
- MI.addOperand(MCOperand::CreateReg(0));
-
- // Rt = Inst{15-12} => ARM Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
-
- NumOpsAdded = 4;
- return true;
-}
-
-// Vector Duplicate Instructions (from ARM core register to all elements).
-// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
-static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-
- assert(NumOps >= 2 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- "Expect >= 2 operands and first 2 as reg operand");
-
- unsigned RegClass = OpInfo[0].RegClass;
-
- // Qd/Dd = Inst{7:19-16} => NEON Rn
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass,
- decodeNEONRn(insn))));
-
- // Rt = Inst{15-12} => ARM Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- NumOpsAdded = 2;
- return true;
-}
-
-static inline bool PreLoadOpcode(unsigned Opcode) {
- switch(Opcode) {
- case ARM::PLDi12: case ARM::PLDrs:
- case ARM::PLDWi12: case ARM::PLDWrs:
- case ARM::PLIi12: case ARM::PLIrs:
- return true;
- default:
- return false;
- }
-}
-
-static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // Preload Data/Instruction requires either 2 or 3 operands.
- // PLDi12, PLDWi12, PLIi12: addrmode_imm12
- // PLDrs, PLDWrs, PLIrs: ldst_so_reg
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12
- || Opcode == ARM::PLIi12) {
- unsigned Imm12 = slice(insn, 11, 0);
- bool Negative = getUBit(insn) == 0;
-
- // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12.
- if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) {
- DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n");
- MI.setOpcode(ARM::PLDi12);
- }
-
- // -0 is represented specially. All other values are as normal.
- int Offset = Negative ? -1 * Imm12 : Imm12;
- if (Imm12 == 0 && Negative)
- Offset = INT32_MIN;
-
- MI.addOperand(MCOperand::CreateImm(Offset));
- NumOpsAdded = 2;
- } else {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
-
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShImm = slice(insn, 11, 7);
-
- // A8.4.1. Possible rrx or shift amount of 32...
- getImmShiftSE(ShOp, ShImm);
- MI.addOperand(MCOperand::CreateImm(
- ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
- NumOpsAdded = 3;
- }
-
- return true;
-}
-
-static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) {
- // Inst{3-0} encodes the memory barrier option for the variants.
- unsigned opt = slice(insn, 3, 0);
- switch (opt) {
- case ARM_MB::SY: case ARM_MB::ST:
- case ARM_MB::ISH: case ARM_MB::ISHST:
- case ARM_MB::NSH: case ARM_MB::NSHST:
- case ARM_MB::OSH: case ARM_MB::OSHST:
- MI.addOperand(MCOperand::CreateImm(opt));
- NumOpsAdded = 1;
- return true;
- default:
- return false;
- }
- }
-
- switch (Opcode) {
- case ARM::CLREX:
- case ARM::NOP:
- case ARM::TRAP:
- case ARM::YIELD:
- case ARM::WFE:
- case ARM::WFI:
- case ARM::SEV:
- return true;
- case ARM::SWP:
- case ARM::SWPB:
- // SWP, SWPB: Rd Rm Rn
- // Delegate to DisassembleLdStExFrm()....
- return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- default:
- break;
- }
-
- if (Opcode == ARM::SETEND) {
- NumOpsAdded = 1;
- MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
- return true;
- }
-
- // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
- // opcodes which match the same real instruction. This is needed since there's
- // no current handling of optional arguments. Fix here when a better handling
- // of optional arguments is implemented.
- if (Opcode == ARM::CPS3p) { // M = 1
- // Let's reject these impossible imod values by returning false:
- // 1. (imod=0b01)
- //
- // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an
- // invalid combination, so we just check for imod=0b00 here.
- if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
- return false;
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
- MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
- NumOpsAdded = 3;
- return true;
- }
- if (Opcode == ARM::CPS2p) { // mode = 0, M = 0
- // Let's reject these impossible imod values by returning false:
- // 1. (imod=0b00,M=0)
- // 2. (imod=0b01)
- if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
- return false;
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
- MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
- NumOpsAdded = 2;
- return true;
- }
- if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
- NumOpsAdded = 1;
- return true;
- }
-
- // DBG has its option specified in Inst{3-0}.
- if (Opcode == ARM::DBG) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
- NumOpsAdded = 1;
- return true;
- }
-
- // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}).
- if (Opcode == ARM::BKPT) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 |
- slice(insn, 3, 0)));
- NumOpsAdded = 1;
- return true;
- }
-
- if (PreLoadOpcode(Opcode))
- return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- assert(0 && "Unexpected misc instruction!");
- return false;
-}
-
-/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
-/// We divide the disassembly task into different categories, with each one
-/// corresponding to a specific instruction encoding format. There could be
-/// exceptions when handling a specific format, and that is why the Opcode is
-/// also present in the function prototype.
-static const DisassembleFP FuncPtrs[] = {
- &DisassemblePseudo,
- &DisassembleMulFrm,
- &DisassembleBrFrm,
- &DisassembleBrMiscFrm,
- &DisassembleDPFrm,
- &DisassembleDPSoRegFrm,
- &DisassembleLdFrm,
- &DisassembleStFrm,
- &DisassembleLdMiscFrm,
- &DisassembleStMiscFrm,
- &DisassembleLdStMulFrm,
- &DisassembleLdStExFrm,
- &DisassembleArithMiscFrm,
- &DisassembleSatFrm,
- &DisassembleExtFrm,
- &DisassembleVFPUnaryFrm,
- &DisassembleVFPBinaryFrm,
- &DisassembleVFPConv1Frm,
- &DisassembleVFPConv2Frm,
- &DisassembleVFPConv3Frm,
- &DisassembleVFPConv4Frm,
- &DisassembleVFPConv5Frm,
- &DisassembleVFPLdStFrm,
- &DisassembleVFPLdStMulFrm,
- &DisassembleVFPMiscFrm,
- &DisassembleThumbFrm,
- &DisassembleMiscFrm,
- &DisassembleNGetLnFrm,
- &DisassembleNSetLnFrm,
- &DisassembleNDupFrm,
-
- // VLD and VST (including one lane) Instructions.
- &DisassembleNLdSt,
-
- // A7.4.6 One register and a modified immediate value
- // 1-Register Instructions with imm.
- // LLVM only defines VMOVv instructions.
- &DisassembleN1RegModImmFrm,
-
- // 2-Register Instructions with no imm.
- &DisassembleN2RegFrm,
-
- // 2-Register Instructions with imm (vector convert float/fixed point).
- &DisassembleNVCVTFrm,
-
- // 2-Register Instructions with imm (vector dup lane).
- &DisassembleNVecDupLnFrm,
-
- // Vector Shift Left Instructions.
- &DisassembleN2RegVecShLFrm,
-
- // Vector Shift Righ Instructions, which has different interpretation of the
- // shift amount from the imm6 field.
- &DisassembleN2RegVecShRFrm,
-
- // 3-Register Data-Processing Instructions.
- &DisassembleN3RegFrm,
-
- // Vector Shift (Register) Instructions.
- // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand)
- &DisassembleN3RegVecShFrm,
-
- // Vector Extract Instructions.
- &DisassembleNVecExtractFrm,
-
- // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long]
- // By Scalar Instructions.
- &DisassembleNVecMulScalarFrm,
-
- // Vector Table Lookup uses byte indexes in a control vector to look up byte
- // values in a table and generate a new vector.
- &DisassembleNVTBLFrm,
-
- NULL
-};
-
-/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
-/// The general idea is to set the Opcode for the MCInst, followed by adding
-/// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
-/// to the Format-specific disassemble function for disassembly, followed by
-/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand
-/// which follow the Dst/Src Operands.
-bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
- // Stage 1 sets the Opcode.
- MI.setOpcode(Opcode);
- // If the number of operands is zero, we're done!
- if (NumOps == 0)
- return true;
-
- // Stage 2 calls the format-specific disassemble function to build the operand
- // list.
- if (Disasm == NULL)
- return false;
- unsigned NumOpsAdded = 0;
- bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
-
- if (!OK || this->Err != 0) return false;
- if (NumOpsAdded >= NumOps)
- return true;
-
- // Stage 3 deals with operands unaccounted for after stage 2 is finished.
- // FIXME: Should this be done selectively?
- return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
-}
-
-// A8.3 Conditional execution
-// A8.3.1 Pseudocode details of conditional execution
-// Condition bits '111x' indicate the instruction is always executed.
-static uint32_t CondCode(uint32_t CondField) {
- if (CondField == 0xF)
- return ARMCC::AL;
- return CondField;
-}
-
-/// DoPredicateOperands - DoPredicateOperands process the predicate operands
-/// of some Thumb instructions which come before the reglist operands. It
-/// returns true if the two predicate operands have been processed.
-bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
- uint32_t /* insn */, unsigned short NumOpsRemaining) {
-
- assert(NumOpsRemaining > 0 && "Invalid argument");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned Idx = MI.getNumOperands();
-
- // First, we check whether this instr specifies the PredicateOperand through
- // a pair of MCOperandInfos with isPredicate() property.
- if (NumOpsRemaining >= 2 &&
- OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass < 0 &&
- OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
- {
- // If we are inside an IT block, get the IT condition bits maintained via
- // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
- // See also A2.5.2.
- if (InITBlock())
- MI.addOperand(MCOperand::CreateImm(GetITCond()));
- else
- MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
- MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
- return true;
- }
-
- return false;
-}
-
-/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
-/// the possible Predicate and SBitModifier, to build the remaining MCOperand
-/// constituents.
-bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOpsRemaining) {
-
- assert(NumOpsRemaining > 0 && "Invalid argument");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- const std::string &Name = ARMInsts[Opcode].Name;
- unsigned Idx = MI.getNumOperands();
- uint64_t TSFlags = ARMInsts[Opcode].TSFlags;
-
- // First, we check whether this instr specifies the PredicateOperand through
- // a pair of MCOperandInfos with isPredicate() property.
- if (NumOpsRemaining >= 2 &&
- OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass < 0 &&
- OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
- {
- // If we are inside an IT block, get the IT condition bits maintained via
- // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
- // See also A2.5.2.
- if (InITBlock())
- MI.addOperand(MCOperand::CreateImm(GetITCond()));
- else {
- if (Name.length() > 1 && Name[0] == 't') {
- // Thumb conditional branch instructions have their cond field embedded,
- // like ARM.
- //
- // A8.6.16 B
- // Check for undefined encodings.
- unsigned cond;
- if (Name == "t2Bcc") {
- if ((cond = slice(insn, 25, 22)) >= 14)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
- } else if (Name == "tBcc") {
- if ((cond = slice(insn, 11, 8)) == 14)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
- } else
- MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
- } else {
- // ARM instructions get their condition field from Inst{31-28}.
- // We should reject Inst{31-28} = 0b1111 as invalid encoding.
- if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
- }
- }
- MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
- Idx += 2;
- NumOpsRemaining -= 2;
- }
-
- if (NumOpsRemaining == 0)
- return true;
-
- // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
- if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
- MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
- --NumOpsRemaining;
- }
-
- if (NumOpsRemaining == 0)
- return true;
- else
- return false;
-}
-
-/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
-/// after BuildIt is finished.
-bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
- uint32_t insn) {
-
- if (!SP) return Status;
-
- if (Opcode == ARM::t2IT)
- Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false;
- else if (InITBlock())
- SP->UpdateIT();
-
- return Status;
-}
-
-/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
-ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
- unsigned short num)
- : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) {
- unsigned Idx = (unsigned)format;
- assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
- Disasm = FuncPtrs[Idx];
-}
-
-/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
-/// infrastructure of an MCInst given the Opcode and Format of the instr.
-/// Return NULL if it fails to create/return a proper builder. API clients
-/// are responsible for freeing up of the allocated memory. Cacheing can be
-/// performed by the API clients to improve performance.
-ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
- // For "Unknown format", fail by returning a NULL pointer.
- if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) {
- DEBUG(errs() << "Unknown format\n");
- return 0;
- }
-
- return new ARMBasicMCBuilder(Opcode, Format,
- ARMInsts[Opcode].getNumOperands());
-}
-
-/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
-/// operand in place of the immediate Value in the MCInst. The immediate
-/// Value has had any PC adjustment made by the caller. If the getOpInfo()
-/// function was set as part of the setupBuilderForSymbolicDisassembly() call
-/// then that function is called to get any symbolic information at the
-/// builder's Address for this instrution. If that returns non-zero then the
-/// symbolic information it returns is used to create an MCExpr and that is
-/// added as an operand to the MCInst. This function returns true if it adds
-/// an operand to the MCInst and false otherwise.
-bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value,
- uint64_t InstSize,
- MCInst &MI) {
- if (!GetOpInfo)
- return false;
-
- struct LLVMOpInfo1 SymbolicOp;
- SymbolicOp.Value = Value;
- if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp))
- return false;
-
- const MCExpr *Add = NULL;
- if (SymbolicOp.AddSymbol.Present) {
- if (SymbolicOp.AddSymbol.Name) {
- StringRef Name(SymbolicOp.AddSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Add = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Sub = NULL;
- if (SymbolicOp.SubtractSymbol.Present) {
- if (SymbolicOp.SubtractSymbol.Name) {
- StringRef Name(SymbolicOp.SubtractSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Off = NULL;
- if (SymbolicOp.Value != 0)
- Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
-
- const MCExpr *Expr;
- if (Sub) {
- const MCExpr *LHS;
- if (Add)
- LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
- else
- LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
- else
- Expr = LHS;
- } else if (Add) {
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
- else
- Expr = Add;
- } else {
- if (Off != 0)
- Expr = Off;
- else
- Expr = MCConstantExpr::Create(0, *Ctx);
- }
-
- if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
- MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
- else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
- MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
- else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
- MI.addOperand(MCOperand::CreateExpr(Expr));
- else
- assert("bad SymbolicOp.VariantKind");
-
- return true;
-}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
deleted file mode 100644
index a7ba141..0000000
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ /dev/null
@@ -1,336 +0,0 @@
-//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-//
-// The first part defines the enumeration type of ARM instruction format, which
-// specifies the encoding used by the instruction, as well as a helper function
-// to convert the enums to printable char strings.
-//
-// It also contains code to represent the concepts of Builder and DisassembleFP
-// to solve the problem of disassembling an ARM instr.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMDISASSEMBLERCORE_H
-#define ARMDISASSEMBLERCORE_H
-
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm-c/Disassembler.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
-#include "ARMDisassembler.h"
-
-namespace llvm {
-class MCContext;
-
-class ARMUtils {
-public:
- static const char *OpcodeName(unsigned Opcode);
-};
-
-/////////////////////////////////////////////////////
-// //
-// Enums and Utilities for ARM Instruction Format //
-// //
-/////////////////////////////////////////////////////
-
-#define ARM_FORMATS \
- ENTRY(ARM_FORMAT_PSEUDO, 0) \
- ENTRY(ARM_FORMAT_MULFRM, 1) \
- ENTRY(ARM_FORMAT_BRFRM, 2) \
- ENTRY(ARM_FORMAT_BRMISCFRM, 3) \
- ENTRY(ARM_FORMAT_DPFRM, 4) \
- ENTRY(ARM_FORMAT_DPSOREGFRM, 5) \
- ENTRY(ARM_FORMAT_LDFRM, 6) \
- ENTRY(ARM_FORMAT_STFRM, 7) \
- ENTRY(ARM_FORMAT_LDMISCFRM, 8) \
- ENTRY(ARM_FORMAT_STMISCFRM, 9) \
- ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
- ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
- ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
- ENTRY(ARM_FORMAT_SATFRM, 13) \
- ENTRY(ARM_FORMAT_EXTFRM, 14) \
- ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
- ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
- ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
- ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
- ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
- ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
- ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
- ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
- ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
- ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
- ENTRY(ARM_FORMAT_THUMBFRM, 25) \
- ENTRY(ARM_FORMAT_MISCFRM, 26) \
- ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
- ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
- ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
- ENTRY(ARM_FORMAT_NLdSt, 30) \
- ENTRY(ARM_FORMAT_N1RegModImm, 31) \
- ENTRY(ARM_FORMAT_N2Reg, 32) \
- ENTRY(ARM_FORMAT_NVCVT, 33) \
- ENTRY(ARM_FORMAT_NVecDupLn, 34) \
- ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
- ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
- ENTRY(ARM_FORMAT_N3Reg, 37) \
- ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
- ENTRY(ARM_FORMAT_NVecExtract, 39) \
- ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
- ENTRY(ARM_FORMAT_NVTBL, 41)
-
-// ARM instruction format specifies the encoding used by the instruction.
-#define ENTRY(n, v) n = v,
-typedef enum {
- ARM_FORMATS
- ARM_FORMAT_NA
-} ARMFormat;
-#undef ENTRY
-
-// Converts enum to const char*.
-static const inline char *stringForARMFormat(ARMFormat form) {
-#define ENTRY(n, v) case n: return #n;
- switch(form) {
- ARM_FORMATS
- case ARM_FORMAT_NA:
- default:
- return "";
- }
-#undef ENTRY
-}
-
-/// Expands on the enum definitions from ARMBaseInstrInfo.h.
-/// They are being used by the disassembler implementation.
-namespace ARMII {
- enum {
- NEONRegMask = 15,
- GPRRegMask = 15,
- NEON_RegRdShift = 12,
- NEON_D_BitShift = 22,
- NEON_RegRnShift = 16,
- NEON_N_BitShift = 7,
- NEON_RegRmShift = 0,
- NEON_M_BitShift = 5
- };
-}
-
-/// Utility function for extracting [From, To] bits from a uint32_t.
-static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
- assert(From < 32 && To < 32 && From >= To);
- return (Bits >> To) & ((1 << (From - To + 1)) - 1);
-}
-
-/// Utility function for setting [From, To] bits to Val for a uint32_t.
-static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
- unsigned Val) {
- assert(From < 32 && To < 32 && From >= To);
- uint32_t Mask = ((1 << (From - To + 1)) - 1);
- Bits &= ~(Mask << To);
- Bits |= (Val & Mask) << To;
-}
-
-// Return an integer result equal to the number of bits of x that are ones.
-static inline uint32_t
-BitCount (uint64_t x)
-{
- // c accumulates the total bits set in x
- uint32_t c;
- for (c = 0; x; ++c)
- {
- x &= x - 1; // clear the least significant bit set
- }
- return c;
-}
-
-static inline bool
-BitIsSet (const uint64_t value, const uint64_t bit)
-{
- return (value & (1ull << bit)) != 0;
-}
-
-static inline bool
-BitIsClear (const uint64_t value, const uint64_t bit)
-{
- return (value & (1ull << bit)) == 0;
-}
-
-/// Various utilities for checking the target specific flags.
-
-/// A unary data processing instruction doesn't have an Rn operand.
-static inline bool isUnaryDP(uint64_t TSFlags) {
- return (TSFlags & ARMII::UnaryDP);
-}
-
-/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111.
-static inline bool isNEONDomain(uint64_t TSFlags) {
- return (TSFlags & ARMII::DomainNEON) ||
- (TSFlags & ARMII::DomainNEONA8);
-}
-
-/// This four-bit field describes the addressing mode used.
-/// See also ARMBaseInstrInfo.h.
-static inline unsigned getAddrMode(uint64_t TSFlags) {
- return (TSFlags & ARMII::AddrModeMask);
-}
-
-/// {IndexModePre, IndexModePost}
-/// Only valid for load and store ops.
-/// See also ARMBaseInstrInfo.h.
-static inline unsigned getIndexMode(uint64_t TSFlags) {
- return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
-}
-
-/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
-static inline bool isPrePostLdSt(uint64_t TSFlags) {
- return (TSFlags & ARMII::IndexModeMask) != 0;
-}
-
-// Forward declaration.
-class ARMBasicMCBuilder;
-
-// Builder Object is mostly ignored except in some Thumb disassemble functions.
-typedef ARMBasicMCBuilder *BO;
-
-/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
-/// and builds the MCOperand list upon disassembly. It returns false on failure
-/// or true on success. The number of operands added is updated upon success.
-typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
-
-/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
-/// infrastructure of an MCInst given the Opcode and Format of the instr.
-/// Return NULL if it fails to create/return a proper builder. API clients
-/// are responsible for freeing up of the allocated memory. Cacheing can be
-/// performed by the API clients to improve performance.
-extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
-
-/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
-/// knows how to build up the MCOperand list.
-class ARMBasicMCBuilder {
- friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
- unsigned Opcode;
- ARMFormat Format;
- unsigned short NumOps;
- DisassembleFP Disasm;
- Session *SP;
- int Err; // !=0 if the builder encounters some error condition during build.
-
-private:
- /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
- ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
-
-public:
- ARMBasicMCBuilder(ARMBasicMCBuilder &B)
- : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
- SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) {
- Err = 0;
- }
-
- virtual ~ARMBasicMCBuilder() {}
-
- void SetSession(Session *sp) {
- SP = sp;
- }
-
- void SetErr(int ErrCode) {
- Err = ErrCode;
- }
-
- /// DoPredicateOperands - DoPredicateOperands process the predicate operands
- /// of some Thumb instructions which come before the reglist operands. It
- /// returns true if the two predicate operands have been processed.
- bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOpsRemaning);
-
- /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
- /// the possible Predicate and SBitModifier, to build the remaining MCOperand
- /// constituents.
- bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOpsRemaning);
-
- /// InITBlock - InITBlock returns true if we are inside an IT block.
- bool InITBlock() {
- if (SP)
- return SP->ITCounter > 0;
-
- return false;
- }
-
- /// Build - Build delegates to BuildIt to perform the heavy liftling. After
- /// that, it invokes RunBuildAfterHook where some housekeepings can be done.
- virtual bool Build(MCInst &MI, uint32_t insn) {
- bool Status = BuildIt(MI, insn);
- return RunBuildAfterHook(Status, MI, insn);
- }
-
- /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
- /// The general idea is to set the Opcode for the MCInst, followed by adding
- /// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
- /// to the Format-specific disassemble function for disassembly, followed by
- /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand
- /// which follow the Dst/Src Operands.
- virtual bool BuildIt(MCInst &MI, uint32_t insn);
-
- /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
- /// after BuildIt is finished.
- virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);
-
-private:
- /// Get condition of the current IT instruction.
- unsigned GetITCond() {
- assert(SP);
- return slice(SP->ITState, 7, 4);
- }
-
-private:
- //
- // Hooks for symbolic disassembly via the public 'C' interface.
- //
- // The function to get the symbolic information for operands.
- LLVMOpInfoCallback GetOpInfo;
- // The pointer to the block of symbolic information for above call back.
- void *DisInfo;
- // The assembly context for creating symbols and MCExprs in place of
- // immediate operands when there is symbolic information.
- MCContext *Ctx;
- // The address of the instruction being disassembled.
- uint64_t Address;
-
-public:
- void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
- void *disInfo, MCContext *ctx,
- uint64_t address) {
- GetOpInfo = getOpInfo;
- DisInfo = disInfo;
- Ctx = ctx;
- Address = address;
- }
-
- uint64_t getBuilderAddress() const { return Address; }
-
- /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
- /// operand in place of the immediate Value in the MCInst. The immediate
- /// Value has had any PC adjustment made by the caller. If the getOpInfo()
- /// function was set as part of the setupBuilderForSymbolicDisassembly() call
- /// then that function is called to get any symbolic information at the
- /// builder's Address for this instrution. If that returns non-zero then the
- /// symbolic information it returns is used to create an MCExpr and that is
- /// added as an operand to the MCInst. This function returns true if it adds
- /// an operand to the MCInst and false otherwise.
- bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI);
-
-};
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
deleted file mode 100644
index 834c6f6..0000000
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ /dev/null
@@ -1,2459 +0,0 @@
-//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains code for disassembling a Thumb instr. It is to be included by
-// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm()
-// function which acts as the dispatcher to disassemble a Thumb instruction.
-//
-//===----------------------------------------------------------------------===//
-
-///////////////////////////////
-// //
-// Utility Functions //
-// //
-///////////////////////////////
-
-// Utilities for 16-bit Thumb instructions.
-/*
-15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
- [ tRt ]
- [ tRm ] [ tRn ] [ tRd ]
- D [ Rm ] [ Rd ]
-
- [ imm3]
- [ imm5 ]
- i [ imm5 ]
- [ imm7 ]
- [ imm8 ]
- [ imm11 ]
-
- [ cond ]
-*/
-
-// Extract tRt: Inst{10-8}.
-static inline unsigned getT1tRt(uint32_t insn) {
- return slice(insn, 10, 8);
-}
-
-// Extract tRm: Inst{8-6}.
-static inline unsigned getT1tRm(uint32_t insn) {
- return slice(insn, 8, 6);
-}
-
-// Extract tRn: Inst{5-3}.
-static inline unsigned getT1tRn(uint32_t insn) {
- return slice(insn, 5, 3);
-}
-
-// Extract tRd: Inst{2-0}.
-static inline unsigned getT1tRd(uint32_t insn) {
- return slice(insn, 2, 0);
-}
-
-// Extract [D:Rd]: Inst{7:2-0}.
-static inline unsigned getT1Rd(uint32_t insn) {
- return slice(insn, 7, 7) << 3 | slice(insn, 2, 0);
-}
-
-// Extract Rm: Inst{6-3}.
-static inline unsigned getT1Rm(uint32_t insn) {
- return slice(insn, 6, 3);
-}
-
-// Extract imm3: Inst{8-6}.
-static inline unsigned getT1Imm3(uint32_t insn) {
- return slice(insn, 8, 6);
-}
-
-// Extract imm5: Inst{10-6}.
-static inline unsigned getT1Imm5(uint32_t insn) {
- return slice(insn, 10, 6);
-}
-
-// Extract i:imm5: Inst{9:7-3}.
-static inline unsigned getT1Imm6(uint32_t insn) {
- return slice(insn, 9, 9) << 5 | slice(insn, 7, 3);
-}
-
-// Extract imm7: Inst{6-0}.
-static inline unsigned getT1Imm7(uint32_t insn) {
- return slice(insn, 6, 0);
-}
-
-// Extract imm8: Inst{7-0}.
-static inline unsigned getT1Imm8(uint32_t insn) {
- return slice(insn, 7, 0);
-}
-
-// Extract imm11: Inst{10-0}.
-static inline unsigned getT1Imm11(uint32_t insn) {
- return slice(insn, 10, 0);
-}
-
-// Extract cond: Inst{11-8}.
-static inline unsigned getT1Cond(uint32_t insn) {
- return slice(insn, 11, 8);
-}
-
-static inline bool IsGPR(unsigned RegClass) {
- return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
-}
-
-// Utilities for 32-bit Thumb instructions.
-
-static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; }
-
-// Extract imm4: Inst{19-16}.
-static inline unsigned getImm4(uint32_t insn) {
- return slice(insn, 19, 16);
-}
-
-// Extract imm3: Inst{14-12}.
-static inline unsigned getImm3(uint32_t insn) {
- return slice(insn, 14, 12);
-}
-
-// Extract imm8: Inst{7-0}.
-static inline unsigned getImm8(uint32_t insn) {
- return slice(insn, 7, 0);
-}
-
-// A8.6.61 LDRB (immediate, Thumb) and friends
-// +/-: Inst{9}
-// imm8: Inst{7-0}
-static inline int decodeImm8(uint32_t insn) {
- int Offset = getImm8(insn);
- return slice(insn, 9, 9) ? Offset : -Offset;
-}
-
-// Extract imm12: Inst{11-0}.
-static inline unsigned getImm12(uint32_t insn) {
- return slice(insn, 11, 0);
-}
-
-// A8.6.63 LDRB (literal) and friends
-// +/-: Inst{23}
-// imm12: Inst{11-0}
-static inline int decodeImm12(uint32_t insn) {
- int Offset = getImm12(insn);
- return slice(insn, 23, 23) ? Offset : -Offset;
-}
-
-// Extract imm2: Inst{7-6}.
-static inline unsigned getImm2(uint32_t insn) {
- return slice(insn, 7, 6);
-}
-
-// For BFI, BFC, t2SBFX, and t2UBFX.
-// Extract lsb: Inst{14-12:7-6}.
-static inline unsigned getLsb(uint32_t insn) {
- return getImm3(insn) << 2 | getImm2(insn);
-}
-
-// For BFI and BFC.
-// Extract msb: Inst{4-0}.
-static inline unsigned getMsb(uint32_t insn) {
- return slice(insn, 4, 0);
-}
-
-// For t2SBFX and t2UBFX.
-// Extract widthminus1: Inst{4-0}.
-static inline unsigned getWidthMinus1(uint32_t insn) {
- return slice(insn, 4, 0);
-}
-
-// For t2ADDri12 and t2SUBri12.
-// imm12 = i:imm3:imm8;
-static inline unsigned getIImm3Imm8(uint32_t insn) {
- return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn);
-}
-
-// For t2MOVi16 and t2MOVTi16.
-// imm16 = imm4:i:imm3:imm8;
-static inline unsigned getImm16(uint32_t insn) {
- return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 |
- getImm3(insn) << 8 | getImm8(insn);
-}
-
-// Inst{5-4} encodes the shift type.
-static inline unsigned getShiftTypeBits(uint32_t insn) {
- return slice(insn, 5, 4);
-}
-
-// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount.
-static inline unsigned getShiftAmtBits(uint32_t insn) {
- return getImm3(insn) << 2 | getImm2(insn);
-}
-
-// A8.6.17 BFC
-// Encoding T1 ARMv6T2, ARMv7
-// LLVM-specific encoding for #<lsb> and #<width>
-static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) {
- uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
- uint32_t msb = getMsb(insn);
- uint32_t Val = 0;
- if (msb < lsb) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
- for (uint32_t i = lsb; i <= msb; ++i)
- Val |= (1 << i);
- mask = ~Val;
- return true;
-}
-
-// A8.4 Shifts applied to a register
-// A8.4.1 Constant shifts
-// A8.4.3 Pseudocode details of instruction-specified shifts and rotates
-//
-// decodeImmShift() returns the shift amount and the the shift opcode.
-// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet.
-static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
- ARM_AM::ShiftOpc &ShOp) {
-
- assert(imm5 < 32 && "Invalid imm5 argument");
- switch (bits2) {
- default: assert(0 && "No such value");
- case 0:
- ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
- return imm5;
- case 1:
- ShOp = ARM_AM::lsr;
- return (imm5 == 0 ? 32 : imm5);
- case 2:
- ShOp = ARM_AM::asr;
- return (imm5 == 0 ? 32 : imm5);
- case 3:
- ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror);
- return (imm5 == 0 ? 1 : imm5);
- }
-}
-
-// A6.3.2 Modified immediate constants in Thumb instructions
-//
-// ThumbExpandImm() returns the modified immediate constant given an imm12 for
-// Thumb data-processing instructions with modified immediate.
-// See also A6.3.1 Data-processing (modified immediate).
-static inline unsigned ThumbExpandImm(unsigned imm12) {
- assert(imm12 <= 0xFFF && "Invalid imm12 argument");
-
- // If the leading two bits is 0b00, the modified immediate constant is
- // obtained by splatting the low 8 bits into the first byte, every other byte,
- // or every byte of a 32-bit value.
- //
- // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is
- // performed.
-
- if (slice(imm12, 11, 10) == 0) {
- unsigned short control = slice(imm12, 9, 8);
- unsigned imm8 = slice(imm12, 7, 0);
- switch (control) {
- default:
- assert(0 && "No such value");
- return 0;
- case 0:
- return imm8;
- case 1:
- return imm8 << 16 | imm8;
- case 2:
- return imm8 << 24 | imm8 << 8;
- case 3:
- return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8;
- }
- } else {
- // A rotate is required.
- unsigned Val = 1 << 7 | slice(imm12, 6, 0);
- unsigned Amt = slice(imm12, 11, 7);
- return ARM_AM::rotr32(Val, Amt);
- }
-}
-
-static inline int decodeImm32_B_EncodingT3(uint32_t insn) {
- bool S = slice(insn, 26, 26);
- bool J1 = slice(insn, 13, 13);
- bool J2 = slice(insn, 11, 11);
- unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1;
- if (S) Imm21 |= 1 << 20;
- if (J2) Imm21 |= 1 << 19;
- if (J1) Imm21 |= 1 << 18;
-
- return SignExtend32<21>(Imm21);
-}
-
-static inline int decodeImm32_B_EncodingT4(uint32_t insn) {
- unsigned S = slice(insn, 26, 26);
- bool I1 = slice(insn, 13, 13) == S;
- bool I2 = slice(insn, 11, 11) == S;
- unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
- if (S) Imm25 |= 1 << 24;
- if (I1) Imm25 |= 1 << 23;
- if (I2) Imm25 |= 1 << 22;
-
- return SignExtend32<25>(Imm25);
-}
-
-static inline int decodeImm32_BL(uint32_t insn) {
- unsigned S = slice(insn, 26, 26);
- bool I1 = slice(insn, 13, 13) == S;
- bool I2 = slice(insn, 11, 11) == S;
- unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
- if (S) Imm25 |= 1 << 24;
- if (I1) Imm25 |= 1 << 23;
- if (I2) Imm25 |= 1 << 22;
-
- return SignExtend32<25>(Imm25);
-}
-
-static inline int decodeImm32_BLX(uint32_t insn) {
- unsigned S = slice(insn, 26, 26);
- bool I1 = slice(insn, 13, 13) == S;
- bool I2 = slice(insn, 11, 11) == S;
- unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2;
- if (S) Imm25 |= 1 << 24;
- if (I1) Imm25 |= 1 << 23;
- if (I2) Imm25 |= 1 << 22;
-
- return SignExtend32<25>(Imm25);
-}
-
-// See, for example, A8.6.221 SXTAB16.
-static inline unsigned decodeRotate(uint32_t insn) {
- unsigned rotate = slice(insn, 5, 4);
- return rotate << 3;
-}
-
-///////////////////////////////////////////////
-// //
-// Thumb1 instruction disassembly functions. //
-// //
-///////////////////////////////////////////////
-
-// See "Utilities for 16-bit Thumb instructions" for register naming convention.
-
-// A6.2.1 Shift (immediate), add, subtract, move, and compare
-//
-// shift immediate: tRd CPSR tRn imm5
-// add/sub register: tRd CPSR tRn tRm
-// add/sub 3-bit immediate: tRd CPSR tRn imm3
-// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8
-// mov/cmp immediate: tRt [CPSR] imm8 (CPSR present for mov)
-//
-// Special case:
-// tMOVSr: tRd tRn
-static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID
- && "Invalid arguments");
-
- bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3);
-
- // Use Rt implies use imm8.
- bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 ||
- Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8);
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::tGPRRegClassID,
- UseRt ? getT1tRt(insn) : getT1tRd(insn))));
- ++OpIdx;
-
- // Check whether the next operand to be added is a CCR Register.
- if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
- assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
- MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
- ++OpIdx;
- }
-
- // Check whether the next operand to be added is a Thumb1 Register.
- assert(OpIdx < NumOps && "More operands expected");
- if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
- // For UseRt, the reg operand is tied to the first reg operand.
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::tGPRRegClassID,
- UseRt ? getT1tRt(insn) : getT1tRn(insn))));
- ++OpIdx;
- }
-
- // Special case for tMOVSr.
- if (OpIdx == NumOps)
- return true;
-
- // The next available operand is either a reg operand or an imm operand.
- if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
- // Three register operand instructions.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRm(insn))));
- } else {
- assert(OpInfo[OpIdx].RegClass < 0 &&
- !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
- unsigned Imm = 0;
- if (UseRt)
- Imm = getT1Imm8(insn);
- else if (Imm3)
- Imm = getT1Imm3(insn);
- else {
- Imm = getT1Imm5(insn);
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11));
- getImmShiftSE(ShOp, Imm);
- }
- MI.addOperand(MCOperand::CreateImm(Imm));
- }
- ++OpIdx;
-
- return true;
-}
-
-// A6.2.2 Data-processing
-//
-// tCMPr, tTST, tCMN: tRd tRn
-// tMVN, tRSB: tRd CPSR tRn
-// Others: tRd CPSR tRd(TIED_TO) tRn
-static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass == ARM::CCRRegClassID
- || OpInfo[1].RegClass == ARM::tGPRRegClassID)
- && "Invalid arguments");
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRd(insn))));
- ++OpIdx;
-
- // Check whether the next operand to be added is a CCR Register.
- if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
- assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
- MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
- ++OpIdx;
- }
-
- // We have either { tRd(TIED_TO), tRn } or { tRn } remaining.
- // Process the TIED_TO operand first.
-
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
- && "Thumb reg operand expected");
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // The reg operand is tied to the first reg operand.
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- }
-
- // Process possible next reg operand.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
- // Add tRn operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRn(insn))));
- ++OpIdx;
- }
-
- return true;
-}
-
-// A6.2.3 Special data instructions and branch and exchange
-//
-// tADDhirr: Rd Rd(TIED_TO) Rm
-// tCMPhir: Rd Rm
-// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn
-// tBX: Rm
-// tBX_RET: 0 operand
-// tBX_RET_vararg: Rm
-// tBLXr_r9: Rm
-// tBRIND: Rm
-static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // tBX_RET has 0 operand.
- if (NumOps == 0)
- return true;
-
- // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm.
- if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX || Opcode==ARM::tBRIND) {
- if (Opcode == ARM::tBLXr_r9) {
- // Handling the two predicate operands before the reg operand.
- if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- return false;
- NumOpsAdded += 2;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- getT1Rm(insn))));
- NumOpsAdded += 1;
-
- if (Opcode == ARM::tBX) {
- // Handling the two predicate operands after the reg operand.
- if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- return false;
- NumOpsAdded += 2;
- }
-
- return true;
- }
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // Add the destination operand.
- unsigned RegClass = OpInfo[OpIdx].RegClass;
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass,
- IsGPR(RegClass) ? getT1Rd(insn)
- : getT1tRd(insn))));
- ++OpIdx;
-
- // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining.
- // Process the TIED_TO operand first.
-
- assert(OpIdx < NumOps && "More operands expected");
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // The reg operand is tied to the first reg operand.
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- }
-
- // The next reg operand is either Rm or tRn.
- assert(OpIdx < NumOps && "More operands expected");
- RegClass = OpInfo[OpIdx].RegClass;
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass,
- IsGPR(RegClass) ? getT1Rm(insn)
- : getT1tRn(insn))));
- ++OpIdx;
-
- return true;
-}
-
-// A8.6.59 LDR (literal)
-//
-// tLDRpci: tRt imm8*4
-static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass < 0 &&
- !OpInfo[1].isPredicate() &&
- !OpInfo[1].isOptionalDef())
- && "Invalid arguments");
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRt(insn))));
-
- // And the (imm8 << 2) operand.
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2));
-
- NumOpsAdded = 2;
-
- return true;
-}
-
-// Thumb specific addressing modes (see ARMInstrThumb.td):
-//
-// t_addrmode_rr := reg + reg
-//
-// t_addrmode_s4 := reg + reg
-// reg + imm5 * 4
-//
-// t_addrmode_s2 := reg + reg
-// reg + imm5 * 2
-//
-// t_addrmode_s1 := reg + reg
-// reg + imm5
-//
-// t_addrmode_sp := sp + imm8 * 4
-//
-
-// A8.6.63 LDRB (literal)
-// A8.6.79 LDRSB (literal)
-// A8.6.75 LDRH (literal)
-// A8.6.83 LDRSH (literal)
-// A8.6.59 LDR (literal)
-//
-// These instrs calculate an address from the PC value and an immediate offset.
-// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
-static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass < 0 &&
- "Expect >= 2 operands, first as reg, and second as imm operand");
-
- // Build the register operand, followed by the (+/-)imm12 immediate.
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
-
- NumOpsAdded = 2;
-
- return true;
-}
-
-
-// A6.2.4 Load/store single data item
-//
-// Load/Store Register (reg|imm): tRd tRn imm5|tRm
-// Load Register Signed Byte|Halfword: tRd tRn tRm
-static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::tGPRRegClassID
- && OpInfo[1].RegClass == ARM::tGPRRegClassID
- && "Expect >= 2 operands and first two as thumb reg operands");
-
- // Add the destination reg and the base reg.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRn(insn))));
- OpIdx = 2;
-
- // We have either { imm5 } or { tRm } remaining.
- // Note that STR/LDR (register) should skip the imm5 offset operand for
- // t_addrmode_s[1|2|4].
-
- assert(OpIdx < NumOps && "More operands expected");
-
- if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
- !OpInfo[OpIdx].isOptionalDef()) {
- // Table A6-5 16-bit Thumb Load/store instructions
- // opA = 0b0101 for STR/LDR (register) and friends.
- // Otherwise, we have STR/LDR (immediate) and friends.
- assert(opA != 5 && "Immediate operand expected for this opcode");
- MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn)));
- ++OpIdx;
- } else {
- // The next reg operand is tRm, the offset.
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
- && "Thumb reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRm(insn))));
- ++OpIdx;
- }
- return true;
-}
-
-// A6.2.4 Load/store single data item
-//
-// Load/Store Register SP relative: tRt ARM::SP imm8
-static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
- && "Unexpected opcode");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass < 0 &&
- !OpInfo[2].isPredicate() &&
- !OpInfo[2].isOptionalDef())
- && "Invalid arguments");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRt(insn))));
- MI.addOperand(MCOperand::CreateReg(ARM::SP));
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
- NumOpsAdded = 3;
- return true;
-}
-
-// Table A6-1 16-bit Thumb instruction encoding
-// A8.6.10 ADR
-//
-// tADDrPCi: tRt imm8
-static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass < 0 &&
- !OpInfo[1].isPredicate() &&
- !OpInfo[1].isOptionalDef())
- && "Invalid arguments");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRt(insn))));
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
- NumOpsAdded = 2;
- return true;
-}
-
-// Table A6-1 16-bit Thumb instruction encoding
-// A8.6.8 ADD (SP plus immediate)
-//
-// tADDrSPi: tRt ARM::SP imm8
-static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass < 0 &&
- !OpInfo[2].isPredicate() &&
- !OpInfo[2].isOptionalDef())
- && "Invalid arguments");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRt(insn))));
- MI.addOperand(MCOperand::CreateReg(ARM::SP));
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
- NumOpsAdded = 3;
- return true;
-}
-
-// tPUSH, tPOP: Pred-Imm Pred-CCR register_list
-//
-// where register_list = low registers + [lr] for PUSH or
-// low registers + [pc] for POP
-//
-// "low registers" is specified by Inst{7-0}
-// lr|pc is specified by Inst{8}
-static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode");
-
- unsigned &OpIdx = NumOpsAdded;
-
- // Handling the two predicate operands before the reglist.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- OpIdx += 2;
- else {
- DEBUG(errs() << "Expected predicate operands not found.\n");
- return false;
- }
-
- unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
- | slice(insn, 7, 0);
-
- // Fill the variadic part of reglist.
- for (unsigned i = 0; i < 16; ++i) {
- if ((RegListBits >> i) & 1) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- i)));
- ++OpIdx;
- }
- }
-
- return true;
-}
-
-// A6.2.5 Miscellaneous 16-bit instructions
-// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP.
-//
-// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7
-// t2IT: firstcond=Inst{7-4} mask=Inst{3-0}
-// tCBNZ, tCBZ: tRd imm6*2
-// tBKPT: imm8
-// tNOP, tSEV, tYIELD, tWFE, tWFI:
-// no operand (except predicate pair)
-// tSETENDBE, tSETENDLE, :
-// no operand
-// Others: tRd tRn
-static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (NumOps == 0)
- return true;
-
- if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
- return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-
- // Predicate operands are handled elsewhere.
- if (NumOps == 2 &&
- OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
- OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
- return true;
- }
-
- if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) {
- // Special case handling for tADDspi and tSUBspi.
- // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate)
- MI.addOperand(MCOperand::CreateReg(ARM::SP));
- MI.addOperand(MCOperand::CreateReg(ARM::SP));
- MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn)));
- NumOpsAdded = 3;
- return true;
- }
-
- if (Opcode == ARM::t2IT) {
- // Special case handling for If-Then.
- // A8.6.50 IT
- // Tag the (firstcond[0] bit << 4) along with mask.
-
- // firstcond
- MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4)));
-
- // firstcond[0] and mask
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
- NumOpsAdded = 2;
- return true;
- }
-
- if (Opcode == ARM::tBKPT) {
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value
- NumOpsAdded = 1;
- return true;
- }
-
- // CPS has a singleton $opt operand that contains the following information:
- // The first op would be 0b10 as enable and 0b11 as disable in regular ARM,
- // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's
- // default one. The second get the AIF flags from Inst{2-0}.
- if (Opcode == ARM::tCPS) {
- MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4)));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0)));
- NumOpsAdded = 2;
- return true;
- }
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
- && "Expect >=2 operands");
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRd(insn))));
-
- if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
- // Two register instructions.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRn(insn))));
- } else {
- // CBNZ, CBZ
- assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode");
- MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
- }
-
- NumOpsAdded = 2;
-
- return true;
-}
-
-// A8.6.53 LDM / LDMIA
-// A8.6.189 STM / STMIA
-//
-// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
-// tLDMIA: tRt AM4ModeImm Pred-Imm Pred-CCR register_list
-static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps,
- unsigned &NumOpsAdded, BO B) {
- assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD ||
- Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode");
-
- unsigned tRt = getT1tRt(insn);
- NumOpsAdded = 0;
-
- // WB register, if necessary.
- if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- tRt)));
- ++NumOpsAdded;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- tRt)));
- ++NumOpsAdded;
-
- // Handling the two predicate operands before the reglist.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
- NumOpsAdded += 2;
- } else {
- DEBUG(errs() << "Expected predicate operands not found.\n");
- return false;
- }
-
- unsigned RegListBits = slice(insn, 7, 0);
- if (BitCount(RegListBits) < 1) {
- DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n");
- return false;
- }
-
- // Fill the variadic part of reglist.
- for (unsigned i = 0; i < 8; ++i)
- if ((RegListBits >> i) & 1) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- i)));
- ++NumOpsAdded;
- }
-
- return true;
-}
-
-static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
-}
-
-static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
-}
-
-// A8.6.16 B Encoding T1
-// cond = Inst{11-8} & imm8 = Inst{7-0}
-// imm32 = SignExtend(imm8:'0', 32)
-//
-// tBcc: offset Pred-Imm Pred-CCR
-// tSVC: imm8 Pred-Imm Pred-CCR
-// tTRAP: 0 operand (early return)
-static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- if (Opcode == ARM::tTRAP)
- return true;
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
- OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
- && "Exactly 3 operands expected");
-
- unsigned Imm8 = getT1Imm8(insn);
- MI.addOperand(MCOperand::CreateImm(
- Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1)
- : (int)Imm8));
-
- // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
- // But note that for tBcc, if cond = '1110' then UNDEFINED.
- if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) {
- DEBUG(errs() << "if cond = '1110' then UNDEFINED\n");
- return false;
- }
- NumOpsAdded = 1;
-
- return true;
-}
-
-// A8.6.16 B Encoding T2
-// imm11 = Inst{10-0}
-// imm32 = SignExtend(imm11:'0', 32)
-//
-// tB: offset
-static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
-
- unsigned Imm11 = getT1Imm11(insn);
-
- MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1)));
-
- NumOpsAdded = 1;
-
- return true;
-
-}
-
-// See A6.2 16-bit Thumb instruction encoding for instruction classes
-// corresponding to op.
-//
-// Table A6-1 16-bit Thumb instruction encoding (abridged)
-// op Instruction or instruction class
-// ------ --------------------------------------------------------------------
-// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7
-// 010000 Data-processing on page A6-8
-// 010001 Special data instructions and branch and exchange on page A6-9
-// 01001x Load from Literal Pool, see LDR (literal) on page A8-122
-// 0101xx Load/store single data item on page A6-10
-// 011xxx
-// 100xxx
-// 10100x Generate PC-relative address, see ADR on page A8-32
-// 10101x Generate SP-relative address, see ADD (SP plus immediate) on
-// page A8-28
-// 1011xx Miscellaneous 16-bit instructions on page A6-11
-// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374
-// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
-// 1101xx Conditional branch, and Supervisor Call on page A6-13
-// 11100x Unconditional Branch, see B on page A8-44
-//
-static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- unsigned op1 = slice(op, 5, 4);
- unsigned op2 = slice(op, 3, 2);
- unsigned op3 = slice(op, 1, 0);
- unsigned opA = slice(op, 5, 2);
- switch (op1) {
- case 0:
- // A6.2.1 Shift (immediate), add, subtract, move, and compare
- return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- case 1:
- switch (op2) {
- case 0:
- switch (op3) {
- case 0:
- // A6.2.2 Data-processing
- return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- case 1:
- // A6.2.3 Special data instructions and branch and exchange
- return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- default:
- // A8.6.59 LDR (literal)
- return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
- break;
- default:
- // A6.2.4 Load/store single data item
- return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- break;
- }
- break;
- case 2:
- switch (op2) {
- case 0:
- // A6.2.4 Load/store single data item
- return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- case 1:
- // A6.2.4 Load/store single data item
- return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- case 2:
- if (op3 <= 1) {
- // A8.6.10 ADR
- return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- } else {
- // A8.6.8 ADD (SP plus immediate)
- return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- default:
- // A6.2.5 Miscellaneous 16-bit instructions
- return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
- break;
- case 3:
- switch (op2) {
- case 0:
- if (op3 <= 1) {
- // A8.6.189 STM / STMIA / STMEA
- return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- } else {
- // A8.6.53 LDM / LDMIA / LDMFD
- return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
- case 1:
- // A6.2.6 Conditional branch, and Supervisor Call
- return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- case 2:
- // Unconditional Branch, see B on page A8-44
- return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- default:
- assert(0 && "Unreachable code");
- break;
- }
- break;
- default:
- assert(0 && "Unreachable code");
- break;
- }
-
- return false;
-}
-
-///////////////////////////////////////////////
-// //
-// Thumb2 instruction disassembly functions. //
-// //
-///////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////
-// //
-// Note: the register naming follows the ARM convention! //
-// //
-///////////////////////////////////////////////////////////
-
-static inline bool Thumb2SRSOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2SRSDBW: case ARM::t2SRSDB:
- case ARM::t2SRSIAW: case ARM::t2SRSIA:
- return true;
- }
-}
-
-static inline bool Thumb2RFEOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2RFEDBW: case ARM::t2RFEDB:
- case ARM::t2RFEIAW: case ARM::t2RFEIA:
- return true;
- }
-}
-
-// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0}
-static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
- NumOpsAdded = 1;
- return true;
-}
-
-// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
-static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- unsigned Rn = decodeRn(insn);
- if (Rn == 15) {
- DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
- return false;
- }
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn)));
- NumOpsAdded = 1;
- return true;
-}
-
-static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (Thumb2SRSOpcode(Opcode))
- return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
-
- if (Thumb2RFEOpcode(Opcode))
- return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD ||
- Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD ||
- Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
- Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
- && "Unexpected opcode");
- assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4");
-
- NumOpsAdded = 0;
-
- unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
-
- // Writeback to base.
- if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD ||
- Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) {
- MI.addOperand(MCOperand::CreateReg(Base));
- ++NumOpsAdded;
- }
-
- MI.addOperand(MCOperand::CreateReg(Base));
- ++NumOpsAdded;
-
- // Handling the two predicate operands before the reglist.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
- NumOpsAdded += 2;
- } else {
- DEBUG(errs() << "Expected predicate operands not found.\n");
- return false;
- }
-
- unsigned RegListBits = insn & ((1 << 16) - 1);
-
- // Fill the variadic part of reglist.
- for (unsigned i = 0; i < 16; ++i)
- if ((RegListBits >> i) & 1) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- i)));
- ++NumOpsAdded;
- }
-
- return true;
-}
-
-// t2LDREX: Rd Rn
-// t2LDREXD: Rd Rs Rn
-// t2LDREXB, t2LDREXH: Rd Rn
-// t2STREX: Rs Rd Rn
-// t2STREXD: Rm Rd Rs Rn
-// t2STREXB, t2STREXH: Rm Rd Rn
-static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass > 0
- && OpInfo[1].RegClass > 0
- && "Expect >=2 operands and first two as reg operands");
-
- bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
- bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
- bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
-
- unsigned Rt = decodeRd(insn);
- unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX.
- unsigned Rd = decodeRm(insn);
- unsigned Rn = decodeRn(insn);
-
- // Some sanity checking first.
- if (isStore) {
- // if d == n || d == t then UNPREDICTABLE
- // if d == n || d == t || d == t2 then UNPREDICTABLE
- if (isDW) {
- if (Rd == Rn || Rd == Rt || Rd == Rt2) {
- DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n");
- return false;
- }
- } else {
- if (isSW) {
- if (Rt2 == Rn || Rt2 == Rt) {
- DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
- return false;
- }
- } else {
- if (Rd == Rn || Rd == Rt) {
- DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
- return false;
- }
- }
- }
- } else {
- // Load
- // A8.6.71 LDREXD
- // if t == t2 then UNPREDICTABLE
- if (isDW && Rt == Rt2) {
- DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
- return false;
- }
- }
-
- // Add the destination operand for store.
- if (isStore) {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- isSW ? Rt2 : Rd)));
- ++OpIdx;
- }
-
- // Source operand for store and destination operand for load.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- Rt)));
- ++OpIdx;
-
- // Thumb2 doubleword complication: with an extra source/destination operand.
- if (isDW) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
- Rt2)));
- ++OpIdx;
- }
-
- // Finally add the pointer operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- Rn)));
- ++OpIdx;
-
- return true;
-}
-
-// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
-// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
-// t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
-//
-// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
-// disassembly only and do not have a tied_to writeback base register operand.
-static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 4
- && OpInfo[0].RegClass > 0
- && OpInfo[0].RegClass == OpInfo[1].RegClass
- && OpInfo[2].RegClass > 0
- && OpInfo[3].RegClass < 0
- && "Expect >= 4 operands and first 3 as reg operands");
-
- // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1).
- unsigned Rt = decodeRd(insn);
- unsigned Rt2 = decodeRs(insn);
- unsigned Rn = decodeRn(insn);
-
- // Some sanity checking first.
-
- // A8.6.67 LDRD (literal) has its W bit as (0).
- if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) {
- if (Rn == 15 && slice(insn, 21, 21) != 0)
- return false;
- } else {
- // For Dual Store, PC cannot be used as the base register.
- if (Rn == 15) {
- DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
- return false;
- }
- }
- if (Rt == Rt2) {
- DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
- return false;
- }
- if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) {
- if (Rn == Rt || Rn == Rt2) {
- DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n");
- return false;
- }
- }
-
- // Add the <Rt> <Rt2> operands.
- unsigned RegClassPair = OpInfo[0].RegClass;
- unsigned RegClassBase = OpInfo[2].RegClass;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
- decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
- decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase,
- decodeRn(insn))));
-
- // Finally add (+/-)imm8*4, depending on the U bit.
- int Offset = getImm8(insn) * 4;
- if (getUBit(insn) == 0)
- Offset = -Offset;
- MI.addOperand(MCOperand::CreateImm(Offset));
- NumOpsAdded = 4;
-
- return true;
-}
-
-// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR
-static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 2 && "Expect >= 2 operands");
-
- // The generic version of TBB/TBH needs a base register.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- // Add the index register.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- NumOpsAdded = 2;
-
- return true;
-}
-
-static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2MOVCClsl: case ARM::t2MOVCClsr:
- case ARM::t2MOVCCasr: case ARM::t2MOVCCror:
- case ARM::t2LSLri: case ARM::t2LSRri:
- case ARM::t2ASRri: case ARM::t2RORri:
- return true;
- }
-}
-
-// A6.3.11 Data-processing (shifted register)
-//
-// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm
-// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm
-// Three register operands: Rs Rn Rm
-// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm
-//
-// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
-// register with shift forms: (Rm, ConstantShiftSpecifier).
-// Constant shift specifier: Imm = (ShOp | ShAmt<<3).
-//
-// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which
-// only require two register operands: Rd, Rm in ARM Reference Manual terms, and
-// nothing else, because the shift amount is already specified.
-// Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
-static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- // Special case handling.
- if (Opcode == ARM::t2BR_JT) {
- assert(NumOps == 4
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && OpInfo[2].RegClass < 0
- && OpInfo[3].RegClass < 0
- && "Exactly 4 operands expect and first two as reg operands");
- // Only need to populate the src reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- MI.addOperand(MCOperand::CreateReg(0));
- MI.addOperand(MCOperand::CreateImm(0));
- MI.addOperand(MCOperand::CreateImm(0));
- NumOpsAdded = 4;
- return true;
- }
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
- OpInfo[0].RegClass == ARM::rGPRRegClassID)
- && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
- OpInfo[1].RegClass == ARM::rGPRRegClassID)
- && "Expect >= 2 operands and first two as reg operands");
-
- bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
- OpInfo[2].RegClass == ARM::rGPRRegClassID));
- bool NoDstReg = (decodeRs(insn) == 0xF);
-
- // Build the register operands, followed by the constant shift specifier.
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, OpInfo[0].RegClass,
- NoDstReg ? decodeRn(insn) : decodeRs(insn))));
- ++OpIdx;
-
- if (ThreeReg) {
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // Process tied_to operand constraint.
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- } else if (!NoDstReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
- decodeRn(insn))));
- ++OpIdx;
- } else {
- DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n");
- return false;
- }
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeRm(insn))));
- ++OpIdx;
-
- if (NumOps == OpIdx)
- return true;
-
- if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
- && !OpInfo[OpIdx].isOptionalDef()) {
-
- if (Thumb2ShiftOpcode(Opcode)) {
- unsigned Imm = getShiftAmtBits(insn);
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4));
- getImmShiftSE(ShOp, Imm);
- MI.addOperand(MCOperand::CreateImm(Imm));
- } else {
- // Build the constant shift specifier operand.
- unsigned bits2 = getShiftTypeBits(insn);
- unsigned imm5 = getShiftAmtBits(insn);
- ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
- unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
- }
- ++OpIdx;
- }
-
- return true;
-}
-
-// A6.3.1 Data-processing (modified immediate)
-//
-// Two register operands: Rs Rn ModImm
-// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
-// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
-// {t2MOVi, t2MVNi}
-//
-// ModImm = ThumbExpandImm(i:imm3:imm8)
-static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned RdRegClassID = OpInfo[0].RegClass;
- assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
- RdRegClassID == ARM::rGPRRegClassID)
- && "Expect >= 2 operands and first one as reg operand");
-
- unsigned RnRegClassID = OpInfo[1].RegClass;
- bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
- || RnRegClassID == ARM::rGPRRegClassID);
- bool NoDstReg = (decodeRs(insn) == 0xF);
-
- // Build the register operands, followed by the modified immediate.
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RdRegClassID,
- NoDstReg ? decodeRn(insn) : decodeRs(insn))));
- ++OpIdx;
-
- if (TwoReg) {
- if (NoDstReg) {
- DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
- return false;
- }
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // The reg operand is tied to the first reg operand.
- MI.addOperand(MI.getOperand(Idx));
- } else {
- // Add second reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
- decodeRn(insn))));
- }
- ++OpIdx;
- }
-
- // The modified immediate operand should come next.
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
- !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
-
- // i:imm3:imm8
- // A6.3.2 Modified immediate constants in Thumb instructions
- unsigned imm12 = getIImm3Imm8(insn);
- MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12)));
- ++OpIdx;
-
- return true;
-}
-
-static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
- switch (Opcode) {
- case ARM::t2SSAT: case ARM::t2SSAT16:
- case ARM::t2USAT: case ARM::t2USAT16:
- return true;
- default:
- return false;
- }
-}
-
-/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
-/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
-/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
-static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned &NumOpsAdded, BO B) {
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
-
- // Disassemble the register def.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRs(insn))));
-
- unsigned Pos = slice(insn, 4, 0);
- if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
- Pos += 1;
- MI.addOperand(MCOperand::CreateImm(Pos));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRn(insn))));
-
- if (NumOpsAdded == 4) {
- ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
- ARM_AM::asr : ARM_AM::lsl);
- // Inst{14-12:7-6} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
- if (ShAmt == 0) {
- if (Opc == ARM_AM::asr)
- ShAmt = 32;
- else
- Opc = ARM_AM::no_shift;
- }
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
- }
- return true;
-}
-
-// A6.3.3 Data-processing (plain binary immediate)
-//
-// o t2ADDri12, t2SUBri12: Rs Rn imm12
-// o t2LEApcrel (ADR): Rs imm12
-// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
-// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm
-// o t2MOVi16: Rs imm16
-// o t2MOVTi16: Rs imm16
-// o t2SBFX (SBFX): Rs Rn lsb width
-// o t2UBFX (UBFX): Rs Rn lsb width
-// o t2BFI (BFI): Rs Rn lsb width
-static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned RdRegClassID = OpInfo[0].RegClass;
- assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
- RdRegClassID == ARM::rGPRRegClassID)
- && "Expect >= 2 operands and first one as reg operand");
-
- unsigned RnRegClassID = OpInfo[1].RegClass;
- bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
- || RnRegClassID == ARM::rGPRRegClassID);
-
- // Build the register operand(s), followed by the immediate(s).
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
- decodeRs(insn))));
- ++OpIdx;
-
- if (TwoReg) {
- assert(NumOps >= 3 && "Expect >= 3 operands");
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // Process tied_to operand constraint.
- MI.addOperand(MI.getOperand(Idx));
- } else {
- // Add src reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
- decodeRn(insn))));
- }
- ++OpIdx;
- }
-
- if (Opcode == ARM::t2BFI) {
- // Add val reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
- && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
-
- // Pre-increment OpIdx.
- ++OpIdx;
-
- if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
- || Opcode == ARM::t2LEApcrel)
- MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
- else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) {
- if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI))
- MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
- } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
- uint32_t mask = 0;
- if (getBitfieldInvMask(insn, mask))
- MI.addOperand(MCOperand::CreateImm(mask));
- else
- return false;
- } else {
- // Handle the case of: lsb width
- assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
- && "Unexpected opcode");
- MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
- MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
-
- ++OpIdx;
- }
-
- return true;
-}
-
-// A6.3.4 Table A6-15 Miscellaneous control instructions
-// A8.6.41 DMB
-// A8.6.42 DSB
-// A8.6.49 ISB
-static inline bool t2MiscCtrlInstr(uint32_t insn) {
- if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 &&
- slice(insn, 12, 12) == 0)
- return true;
-
- return false;
-}
-
-// A6.3.4 Branches and miscellaneous control
-//
-// A8.6.16 B
-// Branches: t2B, t2Bcc -> imm operand
-//
-// Branches: t2TPsoft -> no operand
-//
-// A8.6.23 BL, BLX (immediate)
-// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand
-//
-// A8.6.26
-// t2BXJ -> Rn
-//
-// Miscellaneous control:
-// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
-//
-// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
-// -> no operand (except pred-imm pred-ccr)
-//
-// t2DBG -> imm4 = Inst{3-0}
-//
-// t2MRS/t2MRSsys -> Rs
-// t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
-// t2SMC -> imm4 = Inst{19-16}
-static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (NumOps == 0)
- return true;
-
- if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) {
- // Inst{3-0} encodes the memory barrier option for the variants.
- unsigned opt = slice(insn, 3, 0);
- switch (opt) {
- case ARM_MB::SY: case ARM_MB::ST:
- case ARM_MB::ISH: case ARM_MB::ISHST:
- case ARM_MB::NSH: case ARM_MB::NSHST:
- case ARM_MB::OSH: case ARM_MB::OSHST:
- MI.addOperand(MCOperand::CreateImm(opt));
- NumOpsAdded = 1;
- return true;
- default:
- return false;
- }
- }
-
- if (t2MiscCtrlInstr(insn))
- return true;
-
- switch (Opcode) {
- case ARM::t2CLREX:
- case ARM::t2NOP:
- case ARM::t2YIELD:
- case ARM::t2WFE:
- case ARM::t2WFI:
- case ARM::t2SEV:
- return true;
- default:
- break;
- }
-
- // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
- // opcodes which match the same real instruction. This is needed since there's
- // no current handling of optional arguments. Fix here when a better handling
- // of optional arguments is implemented.
- if (Opcode == ARM::t2CPS3p) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
- MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
- NumOpsAdded = 3;
- return true;
- }
- if (Opcode == ARM::t2CPS2p) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
- MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
- NumOpsAdded = 2;
- return true;
- }
- if (Opcode == ARM::t2CPS1p) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
- NumOpsAdded = 1;
- return true;
- }
-
- // DBG has its option specified in Inst{3-0}.
- if (Opcode == ARM::t2DBG) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
- NumOpsAdded = 1;
- return true;
- }
-
- // MRS and MRSsys take one GPR reg Rs.
- if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRs(insn))));
- NumOpsAdded = 1;
- return true;
- }
- // BXJ takes one GPR reg Rn.
- if (Opcode == ARM::t2BXJ) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- NumOpsAdded = 1;
- return true;
- }
- // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in
- // bit 4, and the special register fields in bits 3-0.
- if (Opcode == ARM::t2MSR) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ |
- slice(insn, 11, 8) /* Special Reg */));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- NumOpsAdded = 2;
- return true;
- }
- // SMC take imm4.
- if (Opcode == ARM::t2SMC) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
- NumOpsAdded = 1;
- return true;
- }
-
- // Some instructions have predicate operands first before the immediate.
- if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) {
- // Handling the two predicate operands before the imm operand.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- NumOpsAdded += 2;
- else {
- DEBUG(errs() << "Expected predicate operands not found.\n");
- return false;
- }
- }
-
- // Add the imm operand.
- int Offset = 0;
-
- switch (Opcode) {
- default:
- assert(0 && "Unexpected opcode");
- return false;
- case ARM::t2B:
- Offset = decodeImm32_B_EncodingT4(insn);
- break;
- case ARM::t2Bcc:
- Offset = decodeImm32_B_EncodingT3(insn);
- break;
- case ARM::tBLr9:
- Offset = decodeImm32_BL(insn);
- break;
- case ARM::tBLXi_r9:
- Offset = decodeImm32_BLX(insn);
- break;
- }
-
- if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI))
- MI.addOperand(MCOperand::CreateImm(Offset));
-
- // This is an increment as some predicate operands may have been added first.
- NumOpsAdded += 1;
-
- return true;
-}
-
-static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2PLDi12: case ARM::t2PLDi8:
- case ARM::t2PLDs:
- case ARM::t2PLDWi12: case ARM::t2PLDWi8:
- case ARM::t2PLDWs:
- case ARM::t2PLIi12: case ARM::t2PLIi8:
- case ARM::t2PLIs:
- return true;
- }
-}
-
-static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // Preload Data/Instruction requires either 2 or 3 operands.
- // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
- // t2PLDr: Rn Rm
- // t2PLDs: Rn Rm imm2=Inst{5-4}
- // Same pattern applies for t2PLDW* and t2PLI*.
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- "Expect >= 2 operands and first one as reg operand");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
-
- if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- } else {
- assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
- && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
- int Offset = 0;
- if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
- Opcode == ARM::t2PLIi8) {
- // A8.6.117 Encoding T2: add = FALSE
- unsigned Imm8 = getImm8(insn);
- Offset = -1 * Imm8;
- } else {
- // The i12 forms. See, for example, A8.6.117 Encoding T1.
- // Note that currently t2PLDi12 also handles the previously named t2PLDpci
- // opcode, that's why we use decodeImm12(insn) which returns +/- imm12.
- Offset = decodeImm12(insn);
- }
- MI.addOperand(MCOperand::CreateImm(Offset));
- }
- ++OpIdx;
-
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
- !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
- MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
- ++OpIdx;
- }
-
- return true;
-}
-
-static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load,
- unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) {
-
- // Inst{22-21} encodes the data item transferred for load/store.
- // For single word, it is encoded as ob10.
- bool Word = (slice(insn, 22, 21) == 2);
- bool Half = (slice(insn, 22, 21) == 1);
- bool Byte = (slice(insn, 22, 21) == 0);
-
- if (UseRm && BadReg(R2)) {
- DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n");
- return true;
- }
-
- if (Load) {
- if (!Word && R0 == 13) {
- DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n");
- return true;
- }
- if (Byte) {
- if (WB && R0 == 15 && slice(insn, 10, 8) == 3) {
- // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0)
- DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
- return true;
- }
- }
- // A6.3.8 Load halfword, memory hints
- if (Half) {
- if (WB) {
- if (R0 == R1) {
- // A8.6.82 LDRSH (immediate) Encoding T2
- DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n");
- return true;
- }
- if (R0 == 15 && slice(insn, 10, 8) == 3) {
- // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0)
- DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
- return true;
- }
- } else {
- if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) {
- if (R0 == 15 && slice(insn, 10, 8) == 4) {
- // A8.6.82 LDRSH (immediate) Encoding T2
- DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE"
- << " \"Unallocated memory hints\"\n");
- return true;
- }
- } else {
- if (R0 == 15) {
- // A8.6.82 LDRSH (immediate) Encoding T1
- DEBUG(errs() << "if Rt == '1111' then SEE"
- << " \"Unallocated memory hints\"\n");
- return true;
- }
- }
- }
- }
- } else {
- if (WB && R0 == R1) {
- DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
- return true;
- }
- if ((WB && R0 == 15) || (!WB && R1 == 15)) {
- DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n");
- return true;
- }
- if (Word) {
- if ((WB && R1 == 15) || (!WB && R0 == 15)) {
- DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
- return true;
- }
- } else {
- if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) {
- DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n");
- return true;
- }
- }
- }
- return false;
-}
-
-// A6.3.10 Store single data item
-// A6.3.9 Load byte, memory hints
-// A6.3.8 Load halfword, memory hints
-// A6.3.7 Load word
-//
-// For example,
-//
-// t2LDRi12: Rd Rn (+)imm12
-// t2LDRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also
-// DisassembleThumb2DPSoReg)
-// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2LDR_PRE: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
-//
-// t2STRi12: Rd Rn (+)imm12
-// t2STRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also
-// DisassembleThumb2DPSoReg)
-// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2STR_PRE: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
-//
-// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated
-// correctly, as LLVM AsmPrinter depends on it. For indexed stores, the first
-// operand is Rn; for all the other instructions, Rd is the first operand.
-//
-// Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
-// Delegates to DisassembleThumb2Ldpci() for load * literal operations.
-static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- unsigned Rn = decodeRn(insn);
-
- if (Thumb2PreloadOpcode(Opcode))
- return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- // See, for example, A6.3.7 Load word: Table A6-18 Load word.
- if (Load && Rn == 15)
- return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass > 0 &&
- OpInfo[1].RegClass > 0 &&
- "Expect >= 3 operands and first two as reg operands");
-
- bool ThreeReg = (OpInfo[2].RegClass > 0);
- bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1;
- bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
-
- // Build the register operands, followed by the immediate.
- unsigned R0 = 0, R1 = 0, R2 = 0;
- unsigned Rd = decodeRd(insn);
- int Imm = 0;
-
- if (!Load && TIED_TO) {
- R0 = Rn;
- R1 = Rd;
- } else {
- R0 = Rd;
- R1 = Rn;
- }
- if (ThreeReg) {
- if (TIED_TO) {
- R2 = Rn;
- Imm = decodeImm8(insn);
- } else {
- R2 = decodeRm(insn);
- // See, for example, A8.6.64 LDRB (register).
- // And ARMAsmPrinter::printT2AddrModeSoRegOperand().
- // LSL is the default shift opc, and LLVM does not expect it to be encoded
- // as part of the immediate operand.
- // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4));
- Imm = slice(insn, 5, 4);
- }
- } else {
- if (Imm12)
- Imm = getImm12(insn);
- else
- Imm = decodeImm8(insn);
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- R0)));
- ++OpIdx;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- R1)));
- ++OpIdx;
-
- if (ThreeReg) {
- // This could be an offset register or a TIED_TO register.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
- R2)));
- ++OpIdx;
- }
-
- if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO,
- TIED_TO))
- return false;
-
- assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
- && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
-
- MI.addOperand(MCOperand::CreateImm(Imm));
- ++OpIdx;
-
- return true;
-}
-
-// A6.3.12 Data-processing (register)
-//
-// Two register operands [rotate]: Rs Rm [rotation(= (rotate:'000'))]
-// Three register operands only: Rs Rn Rm
-// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))]
-//
-// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm
-//
-// Miscellaneous operations: Rs [Rn] Rm
-static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2 &&
- OpInfo[0].RegClass > 0 &&
- OpInfo[1].RegClass > 0 &&
- "Expect >= 2 operands and first two as reg operands");
-
- // Build the register operands, followed by the optional rotation amount.
-
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeRs(insn))));
- ++OpIdx;
-
- if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeRm(insn))));
- ++OpIdx;
-
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Add the rotation amount immediate.
- MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
- ++OpIdx;
- }
-
- return true;
-}
-
-// A6.3.16 Multiply, multiply accumulate, and absolute difference
-//
-// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12}
-// t2MUL, t2SMMUL: Rs Rn Rm
-// t2SMLA[BB|BT|TB|TT|WB|WT]: Rs Rn Rm Ra=Inst{15-12}
-// t2SMUL[BB|BT|TB|TT|WB|WT]: Rs Rn Rm
-//
-// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]:
-// Rs Rn Rm Ra=Inst{15-12}
-//
-// Unsigned Sum of Absolute Differences [and Accumulate]
-// Rs Rn Rm [Ra=Inst{15-12}]
-static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::rGPRRegClassID &&
- OpInfo[1].RegClass == ARM::rGPRRegClassID &&
- OpInfo[2].RegClass == ARM::rGPRRegClassID &&
- "Expect >= 3 operands and first three as reg operands");
-
- // Build the register operands.
-
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRs(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRn(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRm(insn))));
-
- if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRd(insn))));
-
- NumOpsAdded = FourReg ? 4 : 3;
-
- return true;
-}
-
-// A6.3.17 Long multiply, long multiply accumulate, and divide
-//
-// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm
-// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
-//
-// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm
-// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
-//
-// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm
-// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
-//
-// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
-static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::rGPRRegClassID &&
- OpInfo[1].RegClass == ARM::rGPRRegClassID &&
- OpInfo[2].RegClass == ARM::rGPRRegClassID &&
- "Expect >= 3 operands and first three as reg operands");
-
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
-
- // Build the register operands.
-
- if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRd(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRs(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRn(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRm(insn))));
-
- if (FourReg)
- NumOpsAdded = 4;
- else
- NumOpsAdded = 3;
-
- return true;
-}
-
-// See A6.3 32-bit Thumb instruction encoding for instruction classes
-// corresponding to (op1, op2, op).
-//
-// Table A6-9 32-bit Thumb instruction encoding
-// op1 op2 op Instruction class, see
-// --- ------- -- -----------------------------------------------------------
-// 01 00xx0xx - Load/store multiple on page A6-23
-// 00xx1xx - Load/store dual, load/store exclusive, table branch on
-// page A6-24
-// 01xxxxx - Data-processing (shifted register) on page A6-31
-// 1xxxxxx - Coprocessor instructions on page A6-40
-// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15
-// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19
-// - 1 Branches and miscellaneous control on page A6-20
-// 11 000xxx0 - Store single data item on page A6-30
-// 001xxx0 - Advanced SIMD element or structure load/store instructions
-// on page A7-27
-// 00xx001 - Load byte, memory hints on page A6-28
-// 00xx011 - Load halfword, memory hints on page A6-26
-// 00xx101 - Load word on page A6-25
-// 00xx111 - UNDEFINED
-// 010xxxx - Data-processing (register) on page A6-33
-// 0110xxx - Multiply, multiply accumulate, and absolute difference on
-// page A6-38
-// 0111xxx - Long multiply, long multiply accumulate, and divide on
-// page A6-39
-// 1xxxxxx - Coprocessor instructions on page A6-40
-//
-static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
- MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
- unsigned &NumOpsAdded, BO B) {
-
- switch (op1) {
- case 1:
- if (slice(op2, 6, 5) == 0) {
- if (slice(op2, 2, 2) == 0) {
- // Load/store multiple.
- return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
-
- // Load/store dual, load/store exclusive, table branch, otherwise.
- assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!");
- if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
- (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
- // Load/store exclusive.
- return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- if (Opcode == ARM::t2LDRDi8 ||
- Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
- Opcode == ARM::t2STRDi8 ||
- Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
- // Load/store dual.
- return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) {
- // Table branch.
- return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
- } else if (slice(op2, 6, 5) == 1) {
- // Data-processing (shifted register).
- return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
-
- // FIXME: A6.3.18 Coprocessor instructions
- // But see ThumbDisassembler::getInstruction().
-
- break;
- case 2:
- if (op == 0) {
- if (slice(op2, 5, 5) == 0)
- // Data-processing (modified immediate)
- return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- if (Thumb2SaturateOpcode(Opcode))
- return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
-
- // Data-processing (plain binary immediate)
- return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- // Branches and miscellaneous control on page A6-20.
- return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- case 3:
- switch (slice(op2, 6, 5)) {
- case 0:
- // Load/store instructions...
- if (slice(op2, 0, 0) == 0) {
- if (slice(op2, 4, 4) == 0) {
- // Store single data item on page A6-30
- return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded,
- B);
- } else {
- // FIXME: Advanced SIMD element or structure load/store instructions.
- // But see ThumbDisassembler::getInstruction().
- ;
- }
- } else {
- // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
- return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
- NumOpsAdded, B);
- }
- break;
- case 1:
- if (slice(op2, 4, 4) == 0) {
- // A6.3.12 Data-processing (register)
- return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- } else if (slice(op2, 3, 3) == 0) {
- // A6.3.16 Multiply, multiply accumulate, and absolute difference
- return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- } else {
- // A6.3.17 Long multiply, long multiply accumulate, and divide
- return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- break;
- default:
- // FIXME: A6.3.18 Coprocessor instructions
- // But see ThumbDisassembler::getInstruction().
- ;
- break;
- }
-
- break;
- default:
- assert(0 && "Thumb2 encoding error!");
- break;
- }
-
- return false;
-}
-
-static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
-
- uint16_t HalfWord = slice(insn, 31, 16);
-
- if (HalfWord == 0) {
- // A6.2 16-bit Thumb instruction encoding
- // op = bits[15:10]
- uint16_t op = slice(insn, 15, 10);
- return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded,
- Builder);
- }
-
- unsigned bits15_11 = slice(HalfWord, 15, 11);
-
- // A6.1 Thumb instruction set encoding
- if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
- assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
- return false;
- }
-
- // A6.3 32-bit Thumb instruction encoding
-
- uint16_t op1 = slice(HalfWord, 12, 11);
- uint16_t op2 = slice(HalfWord, 10, 4);
- uint16_t op = slice(insn, 15, 15);
-
- return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded,
- Builder);
-}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 78d3e47..ccdac3e 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "ARMBaseInfo.h"
#include "ARMInstPrinter.h"
-#include "ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -25,6 +25,23 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "ARMGenAsmWriter.inc"
+/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
+///
+/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
+static unsigned translateShiftImm(unsigned imm) {
+ if (imm == 0)
+ return 32;
+ return imm;
+}
+
+
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
}
@@ -33,11 +50,12 @@ void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
-void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
unsigned Opcode = MI->getOpcode();
// Check for MOVs and print canonical forms, instead.
- if (Opcode == ARM::MOVs) {
+ if (Opcode == ARM::MOVsr) {
// FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
@@ -51,20 +69,36 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << '\t' << getRegisterName(Dst.getReg())
<< ", " << getRegisterName(MO1.getReg());
- if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
- return;
+ O << ", " << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ printAnnotation(O, Annot);
+ return;
+ }
- O << ", ";
+ if (Opcode == ARM::MOVsi) {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
+ printSBitModifierOperand(MI, 5, O);
+ printPredicateOperand(MI, 3, O);
- if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
- assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ O << '\t' << getRegisterName(Dst.getReg())
+ << ", " << getRegisterName(MO1.getReg());
+
+ if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
+ printAnnotation(O, Annot);
+ return;
}
+
+ O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ printAnnotation(O, Annot);
return;
}
+
// A8.6.123 PUSH
if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
@@ -74,6 +108,15 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << ".w";
O << '\t';
printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(3).getImm() == -4) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 4, O);
+ O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}";
+ printAnnotation(O, Annot);
return;
}
@@ -86,8 +129,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << ".w";
O << '\t';
printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
return;
}
+ if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(4).getImm() == 4) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 5, O);
+ O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}";
+ printAnnotation(O, Annot);
+ return;
+ }
+
// A8.6.355 VPUSH
if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
@@ -96,6 +149,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
printPredicateOperand(MI, 2, O);
O << '\t';
printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
return;
}
@@ -106,10 +160,40 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
printPredicateOperand(MI, 2, O);
O << '\t';
printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (Opcode == ARM::tLDMIA) {
+ bool Writeback = true;
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg)
+ Writeback = false;
+ }
+
+ O << "\tldm";
+
+ printPredicateOperand(MI, 1, O);
+ O << '\t' << getRegisterName(BaseReg);
+ if (Writeback) O << "!";
+ O << ", ";
+ printRegisterList(MI, 3, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // Thumb1 NOP
+ if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 &&
+ MI->getOperand(1).getReg() == ARM::R8) {
+ O << "\tnop";
+ printPredicateOperand(MI, 2, O);
+ printAnnotation(O, Annot);
return;
}
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -122,16 +206,38 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << '#' << Op.getImm();
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << *Op.getExpr();
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
}
}
+void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ if (MO1.isExpr())
+ O << *MO1.getExpr();
+ else if (MO1.isImm())
+ O << "[pc, #" << MO1.getImm() << "]";
+ else
+ llvm_unreachable("Unknown LDR label operand?");
+}
+
// so_reg is a 4-operand unit corresponding to register forms of the A5.1
// "Addressing Mode 1 - Data-processing operands" forms. This includes:
// REG 0 0 - e.g. R5
// REG REG 0,SH_OPC - e.g. R5, ROR R3
// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
-void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
+void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
@@ -144,14 +250,27 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (ShOpc == ARM_AM::rrx)
return;
- if (MO2.getReg()) {
- O << ' ' << getRegisterName(MO2.getReg());
- assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else if (ShOpc != ARM_AM::rrx) {
- O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
- }
+
+ O << ' ' << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+}
+
+void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << getRegisterName(MO1.getReg());
+
+ // Print the shift opc.
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc == ARM_AM::rrx)
+ return;
+ O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
}
+
//===--------------------------------------------------------------------===//
// Addressing Mode #2
//===--------------------------------------------------------------------===//
@@ -209,6 +328,22 @@ void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
<< " #" << ShImm;
}
+void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << getRegisterName(MO1.getReg()) << ", "
+ << getRegisterName(MO2.getReg()) << "]";
+}
+
+void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << getRegisterName(MO1.getReg()) << ", "
+ << getRegisterName(MO2.getReg()) << ", lsl #1]";
+}
+
void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
@@ -284,7 +419,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
O << '[' << getRegisterName(MO1.getReg());
if (MO2.getReg()) {
- O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
+ O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
<< getRegisterName(MO2.getReg()) << ']';
return;
}
@@ -315,8 +450,8 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
if (MO1.getReg()) {
- O << (char)ARM_AM::getAM3Op(MO2.getImm())
- << getRegisterName(MO1.getReg());
+ O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+ << getRegisterName(MO1.getReg());
return;
}
@@ -326,6 +461,31 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
<< ImmOffs;
}
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff);
+}
+
+void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg());
+}
+
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2);
+}
+
+
void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
@@ -345,7 +505,9 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
O << "[" << getRegisterName(MO1.getReg());
- if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+ unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
+ unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
+ if (ImmOffs || Op == ARM_AM::sub) {
O << ", #"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
<< ImmOffs * 4;
@@ -402,20 +564,31 @@ void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned ShiftOp = MI->getOperand(OpNum).getImm();
- ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
- switch (Opc) {
- case ARM_AM::no_shift:
+ bool isASR = (ShiftOp & (1 << 5)) != 0;
+ unsigned Amt = ShiftOp & 0x1f;
+ if (isASR)
+ O << ", asr #" << (Amt == 0 ? 32 : Amt);
+ else if (Amt)
+ O << ", lsl #" << Amt;
+}
+
+void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
return;
- case ARM_AM::lsl:
- O << ", lsl #";
- break;
- case ARM_AM::asr:
- O << ", asr #";
- break;
- default:
- assert(0 && "unexpected shift opcode for shift immediate operand");
- }
- O << ARM_AM::getSORegOffset(ShiftOp);
+ assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
+ O << ", lsl #" << Imm;
+}
+
+void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ // A shift amount of 32 is encoded as 0.
+ if (Imm == 0)
+ Imm = 32;
+ assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
+ O << ", asr #" << Imm;
}
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
@@ -450,6 +623,9 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
for (int i=2; i >= 0; --i)
if (IFlags & (1 << i))
O << ARM_PROC::IFlagsToString(1 << i);
+
+ if (IFlags == 0)
+ O << "none";
}
void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
@@ -458,10 +634,43 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
unsigned SpecRegRBit = Op.getImm() >> 4;
unsigned Mask = Op.getImm() & 0xf;
+ if (getAvailableFeatures() & ARM::FeatureMClass) {
+ switch (Op.getImm()) {
+ default: assert(0 && "Unexpected mask value!");
+ case 0: O << "apsr"; return;
+ case 1: O << "iapsr"; return;
+ case 2: O << "eapsr"; return;
+ case 3: O << "xpsr"; return;
+ case 5: O << "ipsr"; return;
+ case 6: O << "epsr"; return;
+ case 7: O << "iepsr"; return;
+ case 8: O << "msp"; return;
+ case 9: O << "psp"; return;
+ case 16: O << "primask"; return;
+ case 17: O << "basepri"; return;
+ case 18: O << "basepri_max"; return;
+ case 19: O << "faultmask"; return;
+ case 20: O << "control"; return;
+ }
+ }
+
+ // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as
+ // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively.
+ if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
+ O << "APSR_";
+ switch (Mask) {
+ default: assert(0);
+ case 4: O << "g"; return;
+ case 8: O << "nzcvq"; return;
+ case 12: O << "nzcvqg"; return;
+ }
+ llvm_unreachable("Unexpected mask value!");
+ }
+
if (SpecRegRBit)
- O << "spsr";
+ O << "SPSR";
else
- O << "cpsr";
+ O << "CPSR";
if (Mask) {
O << '_';
@@ -501,15 +710,20 @@ void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ raw_ostream &O) {
O << "p" << MI->getOperand(OpNum).getImm();
}
void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ raw_ostream &O) {
O << "c" << MI->getOperand(OpNum).getImm();
}
+void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << MI->getOperand(OpNum).getImm() << "}";
+}
+
void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
@@ -517,7 +731,13 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "#" << MI->getOperand(OpNum).getImm() * 4;
+ O << "#" << MI->getOperand(OpNum).getImm() * 4;
+}
+
+void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << "#" << (Imm == 0 ? 32 : Imm);
}
void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
@@ -610,7 +830,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (ShOpc != ARM_AM::rrx)
- O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
+ O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
}
void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
@@ -647,7 +867,9 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
int32_t OffImm = (int32_t)MO2.getImm();
// Don't print +0.
- if (OffImm < 0)
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
O << ", #-" << -OffImm;
else if (OffImm > 0)
O << ", #" << OffImm;
@@ -671,6 +893,18 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
O << "]";
}
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO2.getImm())
+ O << ", #" << MO2.getImm() * 4;
+ O << "]";
+}
+
void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -678,9 +912,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
int32_t OffImm = (int32_t)MO1.getImm();
// Don't print +0.
if (OffImm < 0)
- O << "#-" << -OffImm;
- else if (OffImm > 0)
- O << "#" << OffImm;
+ O << ", #-" << -OffImm;
+ else
+ O << ", #" << OffImm;
}
void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
@@ -689,10 +923,13 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm() / 4;
// Don't print +0.
- if (OffImm < 0)
- O << "#-" << -OffImm * 4;
- else if (OffImm > 0)
- O << "#" << OffImm * 4;
+ if (OffImm != 0) {
+ O << ", ";
+ if (OffImm < 0)
+ O << "#-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << "#" << OffImm * 4;
+ }
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
@@ -715,39 +952,10 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
O << "]";
}
-void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
- O << '#';
- if (MO.isFPImm()) {
- O << (float)MO.getFPImm();
- } else {
- union {
- uint32_t I;
- float F;
- } FPUnion;
-
- FPUnion.I = MO.getImm();
- O << FPUnion.F;
- }
-}
-
-void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
- O << '#';
- if (MO.isFPImm()) {
- O << MO.getFPImm();
- } else {
- // We expect the binary encoding of a floating point number here.
- union {
- uint64_t I;
- double D;
- } FPUnion;
-
- FPUnion.I = MO.getImm();
- O << FPUnion.D;
- }
+ O << '#' << ARM_AM::getFPImmFloat(MO.getImm());
}
void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
@@ -757,3 +965,28 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
O << "#0x" << utohexstr(Val);
}
+
+void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << "#" << Imm + 1;
+}
+
+void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
+ return;
+ O << ", ror #";
+ switch (Imm) {
+ default: assert (0 && "illegal ror immediate!");
+ case 1: O << "8"; break;
+ case 2: O << "16"; break;
+ case 3: O << "24"; break;
+ }
+}
+
+void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index d5f238b..5c2173f 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -15,6 +15,7 @@
#define ARMINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
@@ -22,10 +23,9 @@ class MCOperand;
class ARMInstPrinter : public MCInstPrinter {
public:
- ARMInstPrinter(const MCAsmInfo &MAI)
- : MCInstPrinter(MAI) {}
+ ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
@@ -38,8 +38,11 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
@@ -48,11 +51,15 @@ public:
raw_ostream &O);
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+ void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -65,8 +72,11 @@ public:
raw_ostream &O);
void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
@@ -88,6 +98,8 @@ public:
raw_ostream &O);
void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
@@ -108,11 +120,15 @@ public:
void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
deleted file mode 100644
index 18645c0..0000000
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARMAsmPrinter
- ARMInstPrinter.cpp
- )
-add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
deleted file mode 100644
index 65d372e..0000000
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMAsmPrinter
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 595708f..9982fa6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -14,7 +14,8 @@
#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
-#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -32,7 +33,8 @@ namespace ARM_AM {
};
enum AddrOpc {
- add = '+', sub = '-'
+ sub = 0,
+ add
};
static inline const char *getAddrOpcStr(AddrOpc Op) {
@@ -60,20 +62,6 @@ namespace ARM_AM {
}
}
- static inline ShiftOpc getShiftOpcForNode(SDValue N) {
- switch (N.getOpcode()) {
- default: return ARM_AM::no_shift;
- case ISD::SHL: return ARM_AM::lsl;
- case ISD::SRL: return ARM_AM::lsr;
- case ISD::SRA: return ARM_AM::asr;
- case ISD::ROTR: return ARM_AM::ror;
- //case ISD::ROTL: // Only if imm -> turn into ROTR.
- // Can't handle RRX here, because it would require folding a flag into
- // the addressing mode. :( This causes us to miss certain things.
- //case ARMISD::RRX: return ARM_AM::rrx;
- }
- }
-
enum AMSubMode {
bad_am_submode = 0,
ia,
@@ -588,6 +576,90 @@ namespace ARM_AM {
AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+ //===--------------------------------------------------------------------===//
+ // Floating-point Immediates
+ //
+ static inline float getFPImmFloat(unsigned Imm) {
+ // We expect an 8-bit binary encoding of a floating-point number here.
+ union {
+ uint32_t I;
+ float F;
+ } FPUnion;
+
+ uint8_t Sign = (Imm >> 7) & 0x1;
+ uint8_t Exp = (Imm >> 4) & 0x7;
+ uint8_t Mantissa = Imm & 0xf;
+
+ // 8-bit FP iEEEE Float Encoding
+ // abcd efgh aBbbbbbc defgh000 00000000 00000000
+ //
+ // where B = NOT(b);
+
+ FPUnion.I = 0;
+ FPUnion.I |= Sign << 31;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ FPUnion.I |= (Exp & 0x3) << 23;
+ FPUnion.I |= Mantissa << 19;
+ return FPUnion.F;
+ }
+
+ /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP32Imm(const APInt &Imm) {
+ uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
+ int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x7ffff)
+ return -1;
+ Mantissa >>= 19;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP32Imm(const APFloat &FPImm) {
+ return getFP32Imm(FPImm.bitcastToAPInt());
+ }
+
+ /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP64Imm(const APInt &Imm) {
+ uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+ int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
+ uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0xffffffffffffULL)
+ return -1;
+ Mantissa >>= 48;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP64Imm(const APFloat &FPImm) {
+ return getFP64Imm(FPImm.bitcastToAPInt());
+ }
+
} // end namespace ARM_AM
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 5e438a9..c31c5e6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -7,9 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMAddressingModes.h"
-#include "ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
@@ -19,12 +20,12 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
namespace {
@@ -35,13 +36,24 @@ public:
/*HasRelocationAddend*/ false) {}
};
-class ARMAsmBackend : public TargetAsmBackend {
+class ARMAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo* STI;
bool isThumbMode; // Currently emitting Thumb code.
public:
- ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {}
+ ARMAsmBackend(const Target &T, const StringRef TT)
+ : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
+ isThumbMode(TT.startswith("thumb")) {}
+
+ ~ARMAsmBackend() {
+ delete STI;
+ }
unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+ bool hasNOP() const {
+ return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0;
+ }
+
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
@@ -65,9 +77,9 @@ public:
{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_blx", 7, 21, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp", 1, 8, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
{ "fixup_arm_movt_hi16", 0, 20, 0 },
@@ -81,7 +93,7 @@ public:
};
if (Kind < FirstTargetFixupKind)
- return TargetAsmBackend::getFixupKindInfo(Kind);
+ return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
@@ -123,20 +135,28 @@ void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
}
bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
+ const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
+ const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
+ const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP
if (isThumb()) {
- // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to
- // use 0x46c0 (which is a 'mov r8, r8' insn).
+ const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
+ : Thumb1_16bitNopEncoding;
uint64_t NumNops = Count / 2;
for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write16(0xbf00);
+ OW->Write16(nopEncoding);
if (Count & 1)
OW->Write8(0);
return true;
}
// ARM mode
+ const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding
+ : ARMv4_NopEncoding;
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write32(0xe1a00000);
+ OW->Write32(nopEncoding);
+ // FIXME: should this function return false when unable to write exactly
+ // 'Count' bytes with NOP encodings?
switch (Count % 4) {
default: break; // No leftover bytes to write
case 1: OW->Write8(0); break;
@@ -163,8 +183,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case ARM::fixup_arm_movw_lo16_pcrel: {
unsigned Hi4 = (Value & 0xF000) >> 12;
unsigned Lo12 = Value & 0x0FFF;
- assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) &&
- "Out of range pc-relative fixup value!");
// inst{19-16} = Hi4;
// inst{11-0} = Lo12;
Value = (Hi4 << 16) | (Lo12);
@@ -185,10 +203,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
// inst{26} = i;
// inst{14-12} = Mid3;
// inst{7-0} = Lo8;
- // The value comes in as the whole thing, not just the portion required
- // for this fixup, so we need to mask off the bits not handled by this
- // portion (lo vs. hi).
- Value &= 0xffff;
Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
uint64_t swapped = (Value & 0xFFFF0000) >> 16;
swapped |= (Value & 0x0000FFFF) << 16;
@@ -382,8 +396,9 @@ namespace {
class ELFARMAsmBackend : public ARMAsmBackend {
public:
Triple::OSType OSType;
- ELFARMAsmBackend(const Target &T, Triple::OSType _OSType)
- : ARMAsmBackend(T), OSType(_OSType) { }
+ ELFARMAsmBackend(const Target &T, const StringRef TT,
+ Triple::OSType _OSType)
+ : ARMAsmBackend(T, TT), OSType(_OSType) { }
void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const;
@@ -414,8 +429,9 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
class DarwinARMAsmBackend : public ARMAsmBackend {
public:
const object::mach::CPUSubtypeARM Subtype;
- DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st)
- : ARMAsmBackend(T), Subtype(st) { }
+ DarwinARMAsmBackend(const Target &T, const StringRef TT,
+ object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T, TT), Subtype(st) { }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
@@ -492,25 +508,24 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
} // end anonymous namespace
-TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
if (TheTriple.getArchName() == "armv4t" ||
TheTriple.getArchName() == "thumbv4t")
- return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T);
else if (TheTriple.getArchName() == "armv5e" ||
TheTriple.getArchName() == "thumbv5e")
- return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ);
else if (TheTriple.getArchName() == "armv6" ||
TheTriple.getArchName() == "thumbv6")
- return new DarwinARMAsmBackend(T, object::mach::CSARM_V6);
- return new DarwinARMAsmBackend(T, object::mach::CSARM_V7);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
}
if (TheTriple.isOSWindows())
assert(0 && "Windows not supported on ARM");
- return new ELFARMAsmBackend(T, Triple(TT).getOS());
+ return new ELFARMAsmBackend(T, TT, Triple(TT).getOS());
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 458f7dd..ec4b6ff 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -17,12 +17,9 @@
#ifndef ARMBASEINFO_H
#define ARMBASEINFO_H
-#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "ARMMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
-// Note that the following auto-generated files only defined enum types, and
-// so are safe to include here.
-
namespace llvm {
// Enums corresponding to ARM condition codes
@@ -191,6 +188,22 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
}
}
+/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
namespace ARMII {
/// ARM Index Modes
@@ -287,6 +300,148 @@ namespace ARMII {
/// call operand.
MO_PLT
};
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+ AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+ // and store ops only. Generic "updating" flag is used for ld/st multiple.
+ // The index mode enums are declared in ARMBaseInfo.h
+ IndexModeShift = 5,
+ IndexModeMask = 3 << IndexModeShift,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 7,
+ FormMask = 0x3f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ LdStExFrm = 11 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 14 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 25 << FormShift,
+
+ // Miscelleaneous format
+ MiscFrm = 26 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 13,
+
+ // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+ // a 16-bit Thumb instruction if certain conditions are met.
+ Xform16Bit = 1 << 14,
+
+ // ThumbArithFlagSetting - The instruction is a 16-bit flag setting Thumb
+ // instruction. Used by the parser to determine whether to require the 'S'
+ // suffix on the mnemonic (when not in an IT block) or preclude it (when
+ // in an IT block).
+ ThumbArithFlagSetting = 1 << 18,
+
+ //===------------------------------------------------------------------===//
+ // Code domain.
+ DomainShift = 15,
+ DomainMask = 7 << DomainShift,
+ DomainGeneral = 0 << DomainShift,
+ DomainVFP = 1 << DomainShift,
+ DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ //
+ // FIXME: This list will need adjusting/fixing as the MC code emitter
+ // takes shape and the ARMCodeEmitter.cpp bits go away.
+ ShiftTypeShift = 4,
+
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+
} // end namespace ARMII
} // end namespace llvm;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 350c92d..350c92d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 53b4c95..1c109e0 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -52,6 +52,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
AsmTransCBE = arm_asm_table;
Data64bitsDirective = 0;
CommentString = "@";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+
SupportsDebugInformation = true;
// Exceptions handling
@@ -64,12 +67,14 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
Data64bitsDirective = 0;
CommentString = "@";
-
- HasLEB128 = true;
PrivateGlobalPrefix = ".L";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+
WeakRefDirective = "\t.weak\t";
- HasLCOMMDirective = true;
+ LCOMMDirectiveType = LCOMM::NoAlignment;
+ HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 39be3f0..865c3e2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -12,17 +12,18 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "ARM.h"
-#include "ARMAddressingModes.h"
-#include "ARMFixupKinds.h"
-#include "ARMInstrInfo.h"
-#include "ARMMCExpr.h"
-#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
@@ -112,11 +113,13 @@ public:
/// immediate Thumb2 direct branch target.
uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
-
+
/// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
/// branch target.
uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
/// getAdrLabelOpValue - Return encoding info for 12-bit immediate
/// ADR label target.
@@ -142,6 +145,16 @@ public:
uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2'
+ /// operand.
+ uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
/// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
/// operand as needed by load/store instructions.
@@ -183,6 +196,10 @@ public:
uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ /// getPostIdxRegOpValue - Return encoding for postidx_reg operands.
+ uint32_t getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
/// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
@@ -251,27 +268,13 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
/// getSORegOpValue - Return an encoded so_reg shifted register value.
- unsigned getSORegOpValue(const MCInst &MI, unsigned Op,
+ unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getSORegImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned getRotImmOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const {
- switch (MI.getOperand(Op).getImm()) {
- default: assert (0 && "Not a valid rot_imm value!");
- case 0: return 0;
- case 8: return 1;
- case 16: return 2;
- case 24: return 3;
- }
- }
-
- unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return MI.getOperand(Op).getImm() - 1;
- }
-
unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
return 64 - MI.getOperand(Op).getImm();
@@ -280,12 +283,6 @@ public:
unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned getMsbOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
- unsigned getSsatBitPosValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
@@ -306,6 +303,9 @@ public:
unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
unsigned EncodedValue) const;
unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
@@ -439,8 +439,10 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
bool isAdd = true;
// Special value for #-0
- if (SImm == INT32_MIN)
+ if (SImm == INT32_MIN) {
SImm = 0;
+ isAdd = false;
+ }
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (SImm < 0) {
@@ -470,11 +472,34 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
return 0;
}
+// Thumb BL and BLX use a strange offset encoding where bits 22 and 21 are
+// determined by negating them and XOR'ing them with bit 23.
+static int32_t encodeThumbBLOffset(int32_t offset) {
+ offset >>= 1;
+ uint32_t S = (offset & 0x800000) >> 23;
+ uint32_t J1 = (offset & 0x400000) >> 22;
+ uint32_t J2 = (offset & 0x200000) >> 21;
+ J1 = (~J1 & 0x1);
+ J2 = (~J2 & 0x1);
+ J1 ^= S;
+ J2 ^= S;
+
+ offset &= ~0x600000;
+ offset |= J1 << 22;
+ offset |= J2 << 21;
+
+ return offset;
+}
+
/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
uint32_t ARMMCCodeEmitter::
getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl,
+ Fixups);
+ return encodeThumbBLOffset(MO.getImm());
}
/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
@@ -482,28 +507,43 @@ getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx,
+ Fixups);
+ return encodeThumbBLOffset(MO.getImm());
}
/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
uint32_t ARMMCCodeEmitter::
getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br,
+ Fixups);
+ return (MO.getImm() >> 1);
}
/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
uint32_t ARMMCCodeEmitter::
getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc,
+ Fixups);
+ return (MO.getImm() >> 1);
}
/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
uint32_t ARMMCCodeEmitter::
getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+ return (MO.getImm() >> 1);
}
/// Return true if this branch has a non-always predication
@@ -513,9 +553,9 @@ static bool HasConditionalBranch(const MCInst &MI) {
for (int i = 0; i < NumOp-1; ++i) {
const MCOperand &MCOp1 = MI.getOperand(i);
const MCOperand &MCOp2 = MI.getOperand(i + 1);
- if (MCOp1.isImm() && MCOp2.isReg() &&
+ if (MCOp1.isImm() && MCOp2.isReg() &&
(MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
- if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
+ if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
return true;
}
}
@@ -541,15 +581,32 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- if (HasConditionalBranch(MI))
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
return ::getBranchTargetOpValue(MI, OpIdx,
- ARM::fixup_arm_condbranch, Fixups);
- return ::getBranchTargetOpValue(MI, OpIdx,
- ARM::fixup_arm_uncondbranch, Fixups);
-}
+ ARM::fixup_arm_uncondbranch, Fixups);
+ }
+ return MO.getImm() >> 2;
+}
+uint32_t ARMMCCodeEmitter::
+getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups);
+ }
+ return MO.getImm() >> 1;
+}
/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
/// immediate branch target.
@@ -579,9 +636,18 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
- Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+ Fixups);
+ int32_t offset = MO.getImm();
+ uint32_t Val = 0x2000;
+ if (offset < 0) {
+ Val = 0x1000;
+ offset *= -1;
+ }
+ Val |= offset;
+ return Val;
}
/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
@@ -589,9 +655,16 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
- Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+ Fixups);
+ int32_t Val = MO.getImm();
+ if (Val < 0) {
+ Val *= -1;
+ Val |= 0x1000;
+ }
+ return Val;
}
/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
@@ -599,9 +672,11 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
- Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+ Fixups);
+ return MO.getImm();
}
/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
@@ -635,17 +710,26 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
Imm12 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
- assert(MO.isExpr() && "Unexpected machine operand type!");
- const MCExpr *Expr = MO.getExpr();
-
- MCFixupKind Kind;
- if (isThumb2())
- Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
- else
- Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
- ++MCNumCPRelocations;
+ MCFixupKind Kind;
+ if (isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else {
+ Reg = ARM::PC;
+ int32_t Offset = MO.getImm();
+ if (Offset < 0) {
+ Offset *= -1;
+ isAdd = false;
+ }
+ Imm12 = Offset;
+ }
} else
isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
@@ -657,6 +741,37 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
return Binary;
}
+/// getT2Imm8s4OpValue - Return encoding info for
+/// '+/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ int32_t Imm8 = MI.getOperand(OpIdx).getImm();
+ bool isAdd = Imm8 >= 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (Imm8 < 0)
+ Imm8 = -Imm8;
+
+ // Scaled by 4.
+ Imm8 /= 4;
+
+ uint32_t Binary = Imm8 & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ return Binary;
+}
+
/// getT2AddrModeImm8s4OpValue - Return encoding info for
/// 'reg +/- imm8<<2' operand.
uint32_t ARMMCCodeEmitter::
@@ -683,6 +798,12 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
} else
isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
uint32_t Binary = (Imm8 >> 2) & 0xff;
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (isAdd)
@@ -691,6 +812,20 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
return Binary;
}
+/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for
+/// 'reg + imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {11-8} = reg
+ // {7-0} = imm8
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm8 = MO1.getImm();
+ return (Reg << 8) | Imm8;
+}
+
// FIXME: This routine assumes that a binary
// expression will always result in a PCRel expression
// In reality, its only true if one or more subexpressions
@@ -818,6 +953,17 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
}
uint32_t ARMMCCodeEmitter::
+getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {4} isAdd
+ // {3-0} Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ bool isAdd = MO1.getImm() != 0;
+ return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4);
+}
+
+uint32_t ARMMCCodeEmitter::
getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
// {9} 1 == imm8, 0 == Rm
@@ -891,7 +1037,10 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+ return (MO.getImm() >> 2);
}
/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
@@ -934,20 +1083,17 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
}
unsigned ARMMCCodeEmitter::
-getSORegOpValue(const MCInst &MI, unsigned OpIdx,
+getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
// Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
- // shifted. The second is either Rs, the amount to shift by, or reg0 in which
- // case the imm contains the amount to shift by.
+ // shifted. The second is Rs, the amount to shift by, and the third specifies
+ // the type of the shift.
//
// {3-0} = Rm.
- // {4} = 1 if reg shift, 0 if imm shift
+ // {4} = 1
// {6-5} = type
- // If reg shift:
- // {11-8} = Rs
- // {7} = 0
- // else (imm shift)
- // {11-7} = imm
+ // {11-8} = Rs
+ // {7} = 0
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
@@ -966,45 +1112,70 @@ getSORegOpValue(const MCInst &MI, unsigned OpIdx,
// LSR - 0011
// ASR - 0101
// ROR - 0111
- // RRX - 0110 and bit[11:8] clear.
switch (SOpc) {
default: llvm_unreachable("Unknown shift opc!");
case ARM_AM::lsl: SBits = 0x1; break;
case ARM_AM::lsr: SBits = 0x3; break;
case ARM_AM::asr: SBits = 0x5; break;
case ARM_AM::ror: SBits = 0x7; break;
- case ARM_AM::rrx: SBits = 0x6; break;
- }
- } else {
- // Set shift operand (bit[6:4]).
- // LSL - 000
- // LSR - 010
- // ASR - 100
- // ROR - 110
- switch (SOpc) {
- default: llvm_unreachable("Unknown shift opc!");
- case ARM_AM::lsl: SBits = 0x0; break;
- case ARM_AM::lsr: SBits = 0x2; break;
- case ARM_AM::asr: SBits = 0x4; break;
- case ARM_AM::ror: SBits = 0x6; break;
}
}
Binary |= SBits << 4;
- if (SOpc == ARM_AM::rrx)
- return Binary;
- // Encode the shift operation Rs or shift_imm (except rrx).
- if (Rs) {
- // Encode Rs bit[11:8].
- assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ // Encode the shift operation Rs.
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ // RRX - 110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ case ARM_AM::rrx:
+ Binary |= 0x60;
+ return Binary;
}
// Encode shift_imm bit[11:7].
- return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+ Binary |= SBits << 4;
+ unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm());
+ assert(Offset && "Offset must be in range 1-32!");
+ if (Offset == 32) Offset = 0;
+ return Binary | (Offset << 7);
}
+
unsigned ARMMCCodeEmitter::
getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -1106,6 +1277,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
case ARM_AM::lsl: SBits = 0x0; break;
case ARM_AM::lsr: SBits = 0x2; break;
case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::rrx: // FALLTHROUGH
case ARM_AM::ror: SBits = 0x6; break;
}
@@ -1131,24 +1303,6 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
}
unsigned ARMMCCodeEmitter::
-getMsbOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const {
- // MSB - 5 bits.
- uint32_t lsb = MI.getOperand(Op-1).getImm();
- uint32_t width = MI.getOperand(Op).getImm();
- uint32_t msb = lsb+width-1;
- assert (width != 0 && msb < 32 && "Illegal bit width!");
- return msb;
-}
-
-unsigned ARMMCCodeEmitter::
-getSsatBitPosValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const {
- // For ssat instructions, the bit position should be encoded decremented by 1
- return MI.getOperand(Op).getImm()-1;
-}
-
-unsigned ARMMCCodeEmitter::
getRegisterListOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
// VLDM/VSTM:
@@ -1158,8 +1312,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
// LDM/STM:
// {15-0} = Bitfield of GPRs.
unsigned Reg = MI.getOperand(Op).getReg();
- bool SPRRegs = ARM::SPRRegClass.contains(Reg);
- bool DPRRegs = ARM::DPRRegClass.contains(Reg);
+ bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
+ bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
unsigned Binary = 0;
@@ -1299,7 +1453,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Size = Desc.getSize();
else
llvm_unreachable("Unexpected instruction size!");
-
+
uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
// Thumb 32-bit wide instructions need to emit the high order halfword
// first.
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 2727ba8..2727ba8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 0a2e883..0a2e883 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index f8fcf2b..a55c410 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -13,10 +13,16 @@
#include "ARMMCTargetDesc.h"
#include "ARMMCAsmInfo.h"
+#include "ARMBaseInfo.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_REGINFO_MC_DESC
#include "ARMGenRegisterInfo.inc"
@@ -35,7 +41,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
unsigned Len = TT.size();
unsigned Idx = 0;
- // FIXME: Enahnce Triple helper class to extract ARM version.
+ // FIXME: Enhance Triple helper class to extract ARM version.
bool isThumb = false;
if (Len >= 5 && TT.substr(0, 4) == "armv")
Idx = 4;
@@ -50,18 +56,21 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
unsigned SubVer = TT[Idx];
if (SubVer >= '7' && SubVer <= '9') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
- // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv";
+ // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
} else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
// v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
- // FeatureT2XtPk
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk";
+ // FeatureT2XtPk, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
} else
- // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2
- ARMArchFeature = "+v7,+neon,+db,+t2dsp";
+ // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
} else if (SubVer == '6') {
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
ARMArchFeature = "+v6t2";
+ else if (Len >= Idx+2 && TT[Idx+1] == 'm')
+ // v6m: FeatureNoARM, FeatureMClass
+ ARMArchFeature = "+v6t2,+noarm,+mclass";
else
ARMArchFeature = "+v6";
} else if (SubVer == '5') {
@@ -80,6 +89,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
ARMArchFeature += ",+thumb-mode";
}
+ Triple TheTriple(TT);
+ if (TheTriple.getOS() == Triple::NativeClient) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+nacl-mode";
+ else
+ ARMArchFeature += ",+nacl-mode";
+ }
+
return ARMArchFeature;
}
@@ -98,36 +115,18 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-// Force static initialization.
-extern "C" void LLVMInitializeARMMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
- ARM_MC::createARMMCSubtargetInfo);
-}
-
static MCInstrInfo *createARMMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitARMMCInstrInfo(X);
return X;
}
-extern "C" void LLVMInitializeARMMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
-}
-
-static MCRegisterInfo *createARMMCRegisterInfo() {
+static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitARMMCRegisterInfo(X);
+ InitARMMCRegisterInfo(X, ARM::LR);
return X;
}
-extern "C" void LLVMInitializeARMMCRegInfo() {
- TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
-}
-
static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
@@ -137,8 +136,128 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
return new ARMELFMCAsmInfo();
}
-extern "C" void LLVMInitializeARMMCAsmInfo() {
- // Register the target asm info.
+static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default) {
+ Triple TheTriple(TT);
+ // Default relocation model on Darwin is PIC, not DynamicNoPIC.
+ RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
+ }
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("ARM does not support Windows COFF format");
+ return NULL;
+ }
+
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new ARMInstPrinter(MAI, STI);
+ return 0;
+}
+
+namespace {
+
+class ARMMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ ARMMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+ virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return true;
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ virtual bool isConditionalBranch(const MCInst &Inst) const {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return false;
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ // We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ // FIXME: This is not right for thumb.
+ return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ }
+};
+
+}
+
+static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new ARMMCInstrAnalysis(Info);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo);
RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheARMTarget, createARMMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheThumbTarget, createARMMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheARMTarget,
+ createARMMCInstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(TheThumbTarget,
+ createARMMCInstrAnalysis);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheARMTarget, createARMAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 74701e3..9b3d3bd 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -14,12 +14,19 @@
#ifndef ARMMCTARGETDESC_H
#define ARMMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
#include <string>
namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
class MCSubtargetInfo;
-class Target;
class StringRef;
+class Target;
+class raw_ostream;
extern Target TheARMTarget, TheThumbTarget;
@@ -33,6 +40,18 @@ namespace ARM_MC {
StringRef FS);
}
+MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT);
+
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
} // End llvm namespace
// Defines symbolic names for ARM registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index a36e47d..352c73e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMFixupKinds.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
@@ -19,7 +19,6 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
using namespace llvm::object;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 68daf42..0000000
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_llvm_library(LLVMARMDesc
- ARMMCTargetDesc.cpp
- ARMMCAsmInfo.cpp
- )
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile
deleted file mode 100644
index 448ed9d..0000000
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
deleted file mode 100644
index c85d1e9..0000000
--- a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "neon-mov-fix"
-#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMInstrInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumVMovs, "Number of reg-reg moves converted");
-
-namespace {
- struct NEONMoveFixPass : public MachineFunctionPass {
- static char ID;
- NEONMoveFixPass() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &Fn);
-
- virtual const char *getPassName() const {
- return "NEON reg-reg move conversion";
- }
-
- private:
- const TargetRegisterInfo *TRI;
- const ARMBaseInstrInfo *TII;
- bool isA8;
-
- typedef DenseMap<unsigned, const MachineInstr*> RegMap;
-
- bool InsertMoves(MachineBasicBlock &MBB);
- };
- char NEONMoveFixPass::ID = 0;
-}
-
-static bool inNEONDomain(unsigned Domain, bool isA8) {
- return (Domain & ARMII::DomainNEON) ||
- (isA8 && (Domain & ARMII::DomainNEONA8));
-}
-
-bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
- RegMap Defs;
- bool Modified = false;
-
- // Walk over MBB tracking the def points of the registers.
- MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMII;
- for (; MII != E; MII = NextMII) {
- NextMII = llvm::next(MII);
- MachineInstr *MI = &*MII;
-
- if (MI->getOpcode() == ARM::VMOVD &&
- !TII->isPredicated(MI)) {
- unsigned SrcReg = MI->getOperand(1).getReg();
- // If we do not find an instruction defining the reg, this means the
- // register should be live-in for this BB. It's always to better to use
- // NEON reg-reg moves.
- unsigned Domain = ARMII::DomainNEON;
- RegMap::iterator DefMI = Defs.find(SrcReg);
- if (DefMI != Defs.end()) {
- Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask;
- // Instructions in general domain are subreg accesses.
- // Map them to NEON reg-reg moves.
- if (Domain == ARMII::DomainGeneral)
- Domain = ARMII::DomainNEON;
- }
-
- if (inNEONDomain(Domain, isA8)) {
- // Convert VMOVD to VORRd
- unsigned DestReg = MI->getOperand(0).getReg();
-
- DEBUG({errs() << "vmov convert: "; MI->dump();});
-
- // It's safe to ignore imp-defs / imp-uses here, since:
- // - We're running late, no intelligent condegen passes should be run
- // afterwards
- // - The imp-defs / imp-uses are superregs only, we don't care about
- // them.
- AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
- TII->get(ARM::VORRd), DestReg)
- .addReg(SrcReg).addReg(SrcReg));
- MBB.erase(MI);
- MachineBasicBlock::iterator I = prior(NextMII);
- MI = &*I;
-
- DEBUG({errs() << " into: "; MI->dump();});
-
- Modified = true;
- ++NumVMovs;
- } else {
- assert((Domain & ARMII::DomainVFP) && "Invalid domain!");
- // Do nothing.
- }
- }
-
- // Update def information.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned MOReg = MO.getReg();
-
- Defs[MOReg] = MI;
- // Catch aliases as well.
- for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
- Defs[*R] = MI;
- }
- }
-
- return Modified;
-}
-
-bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
- ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
- const TargetMachine &TM = Fn.getTarget();
-
- if (AFI->isThumb1OnlyFunction())
- return false;
-
- TRI = TM.getRegisterInfo();
- TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
- isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
-
- bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock &MBB = *MFI;
- Modified |= InsertMoves(MBB);
- }
-
- return Modified;
-}
-
-/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix
-/// pass.
-FunctionPass *llvm::createNEONMoveFixPass() {
- return new NEONMoveFixPass();
-}
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 163a0a9..500e3de 100644
--- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "ARM.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheARMTarget, llvm::TheThumbTarget;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index c258870..d848177 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
-bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -133,9 +133,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
// Adjust FP so it point to the stack slot that contains the previous FP.
if (hasFP(MF)) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0)
- .setMIFlags(MachineInstr::FrameSetup);
+ .setMIFlags(MachineInstr::FrameSetup));
if (NumBytes > 508)
// If offset is > 508 then sp cannot be adjusted in a single instruction,
// try restoring from fp instead.
@@ -155,6 +155,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+ // Thumb1 does not currently support dynamic stack realignment. Report a
+ // fatal error rather then silently generate bad code.
+ if (RegInfo->needsStackRealignment(MF))
+ report_fatal_error("Dynamic stack realignment not supported for thumb1.");
+
// If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 4eb0b6c..e8ed482 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -13,12 +13,12 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "Thumb1InstrInfo.h"
#include "Thumb1RegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -182,7 +181,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
int Opc = 0;
int ExtraOpc = 0;
bool NeedCC = false;
- bool NeedPred = false;
if (DestReg == BaseReg && BaseReg == ARM::SP) {
assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
@@ -217,7 +215,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
} else {
Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
NumBits = 8;
- NeedPred = NeedCC = true;
+ NeedCC = true;
}
isTwoAddr = true;
}
@@ -241,7 +239,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
Bytes -= ThisVal;
const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
const MachineInstrBuilder MIB =
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags));
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
+ .setMIFlags(MIFlags));
AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
} else {
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
@@ -262,18 +261,15 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
if (NeedCC)
MIB = AddDefaultT1CC(MIB);
MIB.addReg(DestReg).addImm(ThisVal);
- if (NeedPred)
- MIB = AddDefaultPred(MIB);
+ MIB = AddDefaultPred(MIB);
MIB.setMIFlags(MIFlags);
- }
- else {
+ } else {
bool isKill = BaseReg != ARM::SP;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
if (NeedCC)
MIB = AddDefaultT1CC(MIB);
MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
- if (NeedPred)
- MIB = AddDefaultPred(MIB);
+ MIB = AddDefaultPred(MIB);
MIB.setMIFlags(MIFlags);
BaseReg = DestReg;
@@ -285,7 +281,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
Scale = 1;
Chunk = ((1 << NumBits) - 1) * Scale;
Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
- NeedPred = NeedCC = isTwoAddr = true;
+ NeedCC = isTwoAddr = true;
}
}
}
@@ -405,7 +401,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
unsigned Scale = 1;
if (FrameReg != ARM::SP) {
Opcode = ARM::tADDi3;
- MI.setDesc(TII.get(Opcode));
NumBits = 3;
} else {
NumBits = 8;
@@ -419,10 +414,9 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
// Turn it into a move.
MI.setDesc(TII.get(ARM::tMOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- // Remove offset and add predicate operands.
+ // Remove offset
MI.RemoveOperand(FrameRegIdx+1);
MachineInstrBuilder MIB(&MI);
- AddDefaultPred(MIB);
return true;
}
@@ -431,6 +425,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
if (((Offset / Scale) & ~Mask) == 0) {
// Replace the FrameIndex with sp / fp
if (Opcode == ARM::tADDi3) {
+ MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
@@ -459,6 +454,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
// r0 = add sp, 255*4
// r0 = add r0, (imm - 255*4)
if (Opcode == ARM::tADDi3) {
+ MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
@@ -479,10 +475,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
MI.setDesc(TII.get(ARM::tADDhirr));
MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
- if (Opcode == ARM::tADDi3) {
- MachineInstrBuilder MIB(&MI);
- AddDefaultPred(MIB);
- }
}
return true;
} else {
@@ -545,9 +537,9 @@ Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
- bool Done = false;
- Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ bool Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
assert (Done && "Unable to resolve frame index!");
+ (void)Done;
}
/// saveScavengerRegister - Spill the register so it can be used by the
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 360ec00..b627400 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -124,6 +124,27 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
if (Uses.count(DstReg) || Defs.count(SrcReg))
return false;
+ // If the CPSR is defined by this copy, then we don't want to move it. E.g.,
+ // if we have:
+ //
+ // movs r1, r1
+ // rsb r1, 0
+ // movs r2, r2
+ // rsb r2, 0
+ //
+ // we don't want this to be converted to:
+ //
+ // movs r1, r1
+ // movs r2, r2
+ // itt mi
+ // rsb r1, 0
+ // rsb r2, 0
+ //
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
+ return false;
+
// Then peek at the next instruction to see if it's predicated on CC or OCC.
// If not, then there is nothing to be gained by moving the copy.
MachineBasicBlock::iterator I = MI; ++I;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 51b56aa..cf040c82 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -14,9 +14,9 @@
#include "Thumb2InstrInfo.h"
#include "ARM.h"
#include "ARMConstantPoolValue.h"
-#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -122,7 +122,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
+ RC == ARM::GPRnopcRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -149,7 +150,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
+ RC == ARM::GPRnopcRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -233,9 +235,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- // FIXME: Fix Thumb1 immediate encoding.
- BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags));
NumBytes = 0;
continue;
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index c741a6e..89a155c 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -9,11 +9,11 @@
#define DEBUG_TYPE "t2-reduce-size"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -97,11 +97,11 @@ namespace {
{ ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
{ ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
{ ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
+ { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
{ ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
+ { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
// FIXME: Clean this up after splitting each Thumb load / store opcode
// into multiple ones.
@@ -507,6 +507,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+ AddDefaultPred(MIB);
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -546,6 +547,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
}
case ARM::t2RSBri:
case ARM::t2RSBSri:
+ case ARM::t2SXTB:
+ case ARM::t2SXTH:
+ case ARM::t2UXTB:
+ case ARM::t2UXTH:
if (MI->getOperand(2).getImm() == 0)
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
break;
@@ -742,7 +747,11 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
continue;
if ((MCID.getOpcode() == ARM::t2RSBSri ||
- MCID.getOpcode() == ARM::t2RSBri) && i == 2)
+ MCID.getOpcode() == ARM::t2RSBri ||
+ MCID.getOpcode() == ARM::t2SXTB ||
+ MCID.getOpcode() == ARM::t2SXTH ||
+ MCID.getOpcode() == ARM::t2UXTB ||
+ MCID.getOpcode() == ARM::t2UXTH) && i == 2)
// Skip the zero immediate operand, it's now implicit.
continue;
bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
index 46ae286..5dce06a 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -26,8 +26,8 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index 7b91fea..f877c65 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -80,7 +80,7 @@ namespace {
// Otherwise we don't know that the it's okay to zapnot this entire
// byte. Only do this iff we can prove that the missing bits are
// already null, so the bytezap doesn't need to really null them.
- BitsToCheck |= ~Constant & (0xFF << 8*i);
+ BitsToCheck |= ~Constant & (0xFFULL << 8*i);
}
}
}
@@ -114,9 +114,8 @@ namespace {
if (!x) return 0;
unsigned at = CountLeadingZeros_64(x);
uint64_t complow = 1ULL << (63 - at);
- uint64_t comphigh = 1ULL << (64 - at);
- //cerr << x << ":" << complow << ":" << comphigh << "\n";
- if (abs64(complow - x) <= abs64(comphigh - x))
+ uint64_t comphigh = complow << 1;
+ if (x - complow <= comphigh - x)
return complow;
else
return comphigh;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
index de003fb..3057eb8 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -49,6 +49,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
// Set up the TargetLowering object.
//I am having problems with shr n i8 1
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
@@ -153,6 +154,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
setStackPointerRegisterToSaveRestore(Alpha::R30);
setJumpBufSize(272);
@@ -160,10 +164,12 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
setMinFunctionAlignment(4);
+ setInsertFencesForAtomic(true);
+
computeRegisterProperties();
}
-MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const {
+EVT AlphaTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i64;
}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
index 13383f4..80f8efa 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
@@ -66,7 +66,7 @@ namespace llvm {
virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
/// getSetCCResultType - Get the SETCC result ValueType
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
index 4dcec8f..8df2ed7 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -16,7 +16,6 @@
#include "AlphaMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
index b201712..c8c9377 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
@@ -607,6 +607,8 @@ def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
(MB)>;
+def : Pat<(atomic_fence (imm), (imm)), (MB)>;
+
//Basic Floating point ops
//Floats
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
index df8f157..8b6230f 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -40,8 +39,7 @@
using namespace llvm;
AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
- : AlphaGenRegisterInfo(),
- TII(tii) {
+ : AlphaGenRegisterInfo(Alpha::R26), TII(tii) {
}
static long getUpper16(long l) {
@@ -178,10 +176,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-unsigned AlphaRegisterInfo::getRARegister() const {
- return Alpha::R26;
-}
-
unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -198,16 +192,6 @@ unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
return 0;
}
-int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
-}
-
-int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
-}
-
std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
{
std::string s(AlphaRegDesc[reg].Name);
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
index 1072bf7..e35be27 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -42,16 +42,12 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
static std::string getPrettyName(unsigned reg);
};
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
index 624a5e2..bd55ce9 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -13,7 +13,6 @@
#include "AlphaSubtarget.h"
#include "Alpha.h"
-#include "llvm/Target/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
index 3b65d41..fc9a677 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -14,7 +14,7 @@
#include "AlphaTargetMachine.h"
#include "llvm/PassManager.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeAlphaTarget() {
@@ -22,19 +22,17 @@ extern "C" void LLVMInitializeAlphaTarget() {
RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
}
-AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+AlphaTargetMachine::AlphaTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
DataLayout("e-f128:128:128-n64"),
FrameLowering(Subtarget),
Subtarget(TT, CPU, FS),
TLInfo(*this),
TSInfo(*this) {
- setRelocationModel(Reloc::PIC_);
}
-
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
index cf00e58..48bb948 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
@@ -36,8 +36,9 @@ class AlphaTargetMachine : public LLVMTargetMachine {
AlphaSelectionDAGInfo TSInfo;
public:
- AlphaTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ AlphaTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
index 562052b..4ad021c 100644
--- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "AlphaMCTargetDesc.h"
#include "AlphaMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "AlphaGenInstrInfo.inc"
@@ -36,8 +37,10 @@ static MCInstrInfo *createAlphaMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeAlphaMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+static MCRegisterInfo *createAlphaMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAlphaMCRegisterInfo(X, Alpha::R26);
+ return X;
}
static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -47,11 +50,29 @@ static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeAlphaMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
- createAlphaMCSubtargetInfo);
+static MCCodeGenInfo *createAlphaMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(Reloc::PIC_, CM);
+ return X;
}
-extern "C" void LLVMInitializeAlphaMCAsmInfo() {
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheAlphaTarget,
+ createAlphaMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheAlphaTarget, createAlphaMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
+ createAlphaMCSubtargetInfo);
}
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index ad0dd26..0000000
--- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMAlphaDesc
- AlphaMCTargetDesc.cpp
- AlphaMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile
deleted file mode 100644
index d55175f..0000000
--- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMAlphaDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
index f7099b9..bdc69e7 100644
--- a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "Alpha.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
llvm::Target llvm::TheAlphaTarget;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
index 6ba258b..ed9844e 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
@@ -29,9 +29,9 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
index 726fa2c..169aa8e 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHA_FRAMEINFO_H
-#define ALPHA_FRAMEINFO_H
+#ifndef BLACKFIN_FRAMEINFO_H
+#define BLACKFIN_FRAMEINFO_H
#include "Blackfin.h"
#include "BlackfinSubtarget.h"
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
index d572832..7d4c45f 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -42,6 +42,7 @@ using namespace llvm;
BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setStackPointerRegisterToSaveRestore(BF::SP);
setIntDivIsCheap(false);
@@ -99,6 +100,7 @@ BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
// Blackfin has no intrinsics for these particular operations.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@@ -134,7 +136,7 @@ const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
+EVT BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
// SETCC always sets the CC register. Technically that is an i1 register, but
// that type is not legal, so we treat it as an i32 register.
return MVT::i32;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
index b65775b..90908ba 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -33,7 +33,7 @@ namespace llvm {
public:
BlackfinTargetLowering(TargetMachine &TM);
virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual void ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index d190ae7..c06a919 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -16,10 +16,10 @@
#include "Blackfin.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_CTOR
#include "BlackfinGenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
index ae8ee9e..7135676 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -34,7 +34,7 @@ namespace bfinIntrinsic {
}
-std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
unsigned numTys) const {
static const char *const names[] = {
#define GET_INTRINSIC_NAME_TABLE
@@ -81,8 +81,8 @@ bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
#include "BlackfinGenIntrinsics.inc"
#undef GET_INTRINSIC_ATTRIBUTES
-static const FunctionType *getType(LLVMContext &Context, unsigned id) {
- const Type *ResultTy = NULL;
+static FunctionType *getType(LLVMContext &Context, unsigned id) {
+ Type *ResultTy = NULL;
std::vector<Type*> ArgTys;
bool IsVarArg = false;
@@ -94,7 +94,7 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) {
}
Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- const Type **Tys,
+ Type **Tys,
unsigned numTy) const {
assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
index 7c4b5a9..f05db5a 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
@@ -19,11 +19,11 @@ namespace llvm {
class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
public:
- std::string getName(unsigned IntrID, const Type **Tys = 0,
+ std::string getName(unsigned IntrID, Type **Tys = 0,
unsigned numTys = 0) const;
unsigned lookupName(const char *Name, unsigned Len) const;
bool isOverloaded(unsigned IID) const;
- Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+ Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
unsigned numTys = 0) const;
};
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 3a7c104..0d415c5 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -37,7 +36,7 @@ using namespace llvm;
BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
const TargetInstrInfo &tii)
- : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {}
+ : BlackfinGenRegisterInfo(BF::RETS), Subtarget(st), TII(tii) {}
const unsigned*
BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -327,10 +326,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-unsigned BlackfinRegisterInfo::getRARegister() const {
- return BF::RETS;
-}
-
unsigned
BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -347,14 +342,3 @@ unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
return 0;
}
-
-int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
-}
-
-int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum,
- bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
-}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 86f45c1..6ac22af 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -53,15 +53,11 @@ namespace llvm {
int SPAdj, RegScavenger *RS = NULL) const;
unsigned getFrameRegister(const MachineFunction &MF) const;
- unsigned getRARegister() const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
// Utility functions
void adjustRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
index ec919cd..0bdce09 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -13,7 +13,7 @@
#include "BlackfinSubtarget.h"
#include "Blackfin.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index a1c9f1c..a4ae46b 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -13,7 +13,7 @@
#include "BlackfinTargetMachine.h"
#include "Blackfin.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -22,10 +22,12 @@ extern "C" void LLVMInitializeBlackfinTarget() {
}
BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ StringRef TT,
+ StringRef CPU,
+ StringRef FS,
+ Reloc::Model RM,
+ CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
DataLayout("e-p:32:32-i64:32-f64:32-n32"),
Subtarget(TT, CPU, FS),
TLInfo(*this),
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
index bd7dc84..c85337fe2 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -35,8 +35,9 @@ namespace llvm {
BlackfinFrameLowering FrameLowering;
BlackfinIntrinsicInfo IntrinsicInfo;
public:
- BlackfinTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ BlackfinTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
index 0fa1471..272e3c2 100644
--- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "BlackfinMCTargetDesc.h"
#include "BlackfinMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "BlackfinGenInstrInfo.inc"
@@ -36,12 +37,12 @@ static MCInstrInfo *createBlackfinMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeBlackfinMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
- createBlackfinMCInstrInfo);
+static MCRegisterInfo *createBlackfinMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitBlackfinMCRegisterInfo(X, BF::RETS);
+ return X;
}
-
static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
StringRef CPU,
StringRef FS) {
@@ -50,11 +51,31 @@ static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
return X;
}
-extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
- createBlackfinMCSubtargetInfo);
+static MCCodeGenInfo *createBlackfinMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
}
-extern "C" void LLVMInitializeBlackfinMCAsmInfo() {
+// Force static initialization.
+extern "C" void LLVMInitializeBlackfinTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheBlackfinTarget,
+ createBlackfinMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
+ createBlackfinMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheBlackfinTarget,
+ createBlackfinMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
+ createBlackfinMCSubtargetInfo);
}
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 8cd924f..0000000
--- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMBlackfinDesc
- BlackfinMCTargetDesc.cpp
- BlackfinMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile
deleted file mode 100644
index 6b26101..0000000
--- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMBlackfinDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
index 402e0af..57f1d3e 100644
--- a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "Blackfin.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/CBackend/CBackend.cpp b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
index 415beb1..69d8c46 100644
--- a/contrib/llvm/lib/Target/CBackend/CBackend.cpp
+++ b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
@@ -37,10 +37,11 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/ErrorHandling.h"
@@ -48,6 +49,7 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Host.h"
#include "llvm/Config/config.h"
#include <algorithm>
@@ -62,12 +64,6 @@ extern "C" void LLVMInitializeCBackendTarget() {
RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
}
-extern "C" void LLVMInitializeCBackendMCAsmInfo() {}
-
-extern "C" void LLVMInitializeCBackendMCInstrInfo() {}
-
-extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {}
-
namespace {
class CBEMCAsmInfo : public MCAsmInfo {
public:
@@ -86,6 +82,8 @@ namespace {
LoopInfo *LI;
const Module *TheModule;
const MCAsmInfo* TAsm;
+ const MCRegisterInfo *MRI;
+ const MCObjectFileInfo *MOFI;
MCContext *TCtx;
const TargetData* TD;
@@ -99,14 +97,14 @@ namespace {
/// UnnamedStructIDs - This contains a unique ID for each struct that is
/// either anonymous or has no name.
- DenseMap<const StructType*, unsigned> UnnamedStructIDs;
+ DenseMap<StructType*, unsigned> UnnamedStructIDs;
public:
static char ID;
explicit CWriter(formatted_raw_ostream &o)
: FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
- TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
- NextAnonValueNumber(0) {
+ TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0),
+ OpaqueCounter(0), NextAnonValueNumber(0) {
initializeLoopInfoPass(*PassRegistry::getPassRegistry());
FPCounter = 0;
}
@@ -145,6 +143,8 @@ namespace {
delete Mang;
delete TCtx;
delete TAsm;
+ delete MRI;
+ delete MOFI;
FPConstantMap.clear();
ByValParams.clear();
intrinsicPrototypesAlreadyGenerated.clear();
@@ -152,20 +152,20 @@ namespace {
return false;
}
- raw_ostream &printType(raw_ostream &Out, const Type *Ty,
+ raw_ostream &printType(raw_ostream &Out, Type *Ty,
bool isSigned = false,
const std::string &VariableName = "",
bool IgnoreName = false,
const AttrListPtr &PAL = AttrListPtr());
- raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+ raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty,
bool isSigned,
const std::string &NameSoFar = "");
void printStructReturnPointerFunctionType(raw_ostream &Out,
const AttrListPtr &PAL,
- const PointerType *Ty);
+ PointerType *Ty);
- std::string getStructName(const StructType *ST);
+ std::string getStructName(StructType *ST);
/// writeOperandDeref - Print the result of dereferencing the specified
/// operand with '*'. This is equivalent to printing '*' then using
@@ -188,7 +188,7 @@ namespace {
void writeOperandWithCast(Value* Operand, const ICmpInst &I);
bool writeInstructionCast(const Instruction &I);
- void writeMemoryAccess(Value *Operand, const Type *OperandType,
+ void writeMemoryAccess(Value *Operand, Type *OperandType,
bool IsVolatile, unsigned Alignment);
private :
@@ -200,7 +200,7 @@ namespace {
void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
void printModuleTypes();
- void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &);
+ void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &);
void printFloatingPointConstants(Function &F);
void printFloatingPointConstants(const Constant *C);
void printFunctionSignature(const Function *F, bool Prototype);
@@ -209,7 +209,7 @@ namespace {
void printBasicBlock(BasicBlock *BB);
void printLoop(Loop *L);
- void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy);
+ void printCast(unsigned opcode, Type *SrcTy, Type *DstTy);
void printConstant(Constant *CPV, bool Static);
void printConstantWithCast(Constant *CPV, unsigned Opcode);
bool printConstExprCast(const ConstantExpr *CE, bool Static);
@@ -288,10 +288,12 @@ namespace {
void visitInvokeInst(InvokeInst &I) {
llvm_unreachable("Lowerinvoke pass didn't work!");
}
-
void visitUnwindInst(UnwindInst &I) {
llvm_unreachable("Lowerinvoke pass didn't work!");
}
+ void visitResumeInst(ResumeInst &I) {
+ llvm_unreachable("DwarfEHPrepare pass didn't work!");
+ }
void visitUnreachableInst(UnreachableInst &I);
void visitPHINode(PHINode &I);
@@ -360,8 +362,8 @@ static std::string CBEMangle(const std::string &S) {
return Result;
}
-std::string CWriter::getStructName(const StructType *ST) {
- if (!ST->isAnonymous() && !ST->getName().empty())
+std::string CWriter::getStructName(StructType *ST) {
+ if (!ST->isLiteral() && !ST->getName().empty())
return CBEMangle("l_"+ST->getName().str());
return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
@@ -373,20 +375,20 @@ std::string CWriter::getStructName(const StructType *ST) {
/// print it as "Struct (*)(...)", for struct return functions.
void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
const AttrListPtr &PAL,
- const PointerType *TheTy) {
- const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+ PointerType *TheTy) {
+ FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
std::string tstr;
raw_string_ostream FunctionInnards(tstr);
FunctionInnards << " (*) (";
bool PrintedType = false;
FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
- const Type *RetTy = cast<PointerType>(*I)->getElementType();
+ Type *RetTy = cast<PointerType>(*I)->getElementType();
unsigned Idx = 1;
for (++I, ++Idx; I != E; ++I, ++Idx) {
if (PrintedType)
FunctionInnards << ", ";
- const Type *ArgTy = *I;
+ Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
@@ -408,7 +410,7 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
}
raw_ostream &
-CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
+CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned,
const std::string &NameSoFar) {
assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
"Invalid type for printSimpleType");
@@ -444,7 +446,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
" __attribute__((vector_size(64))) " + NameSoFar);
case Type::VectorTyID: {
- const VectorType *VTy = cast<VectorType>(Ty);
+ VectorType *VTy = cast<VectorType>(Ty);
return printSimpleType(Out, VTy->getElementType(), isSigned,
" __attribute__((vector_size(" +
utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
@@ -461,7 +463,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
// Pass the Type* and the variable name and this prints out the variable
// declaration.
//
-raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
+raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty,
bool isSigned, const std::string &NameSoFar,
bool IgnoreName, const AttrListPtr &PAL) {
if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
@@ -471,14 +473,14 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
switch (Ty->getTypeID()) {
case Type::FunctionTyID: {
- const FunctionType *FTy = cast<FunctionType>(Ty);
+ FunctionType *FTy = cast<FunctionType>(Ty);
std::string tstr;
raw_string_ostream FunctionInnards(tstr);
FunctionInnards << " (" << NameSoFar << ") (";
unsigned Idx = 1;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I) {
- const Type *ArgTy = *I;
+ Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
@@ -502,7 +504,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
return Out;
}
case Type::StructTyID: {
- const StructType *STy = cast<StructType>(Ty);
+ StructType *STy = cast<StructType>(Ty);
// Check to see if the type is named.
if (!IgnoreName)
@@ -523,7 +525,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
}
case Type::PointerTyID: {
- const PointerType *PTy = cast<PointerType>(Ty);
+ PointerType *PTy = cast<PointerType>(Ty);
std::string ptrName = "*" + NameSoFar;
if (PTy->getElementType()->isArrayTy() ||
@@ -537,7 +539,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
}
case Type::ArrayTyID: {
- const ArrayType *ATy = cast<ArrayType>(Ty);
+ ArrayType *ATy = cast<ArrayType>(Ty);
unsigned NumElements = ATy->getNumElements();
if (NumElements == 0) NumElements = 1;
// Arrays are wrapped in structs to allow them to have normal
@@ -560,7 +562,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
// As a special case, print the array as a string if it is an array of
// ubytes or an array of sbytes with positive values.
//
- const Type *ETy = CPA->getType()->getElementType();
+ Type *ETy = CPA->getType()->getElementType();
bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
ETy == Type::getInt8Ty(CPA->getContext()));
@@ -682,7 +684,7 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) {
/// Print out the casting for a cast operation. This does the double casting
/// necessary for conversion to the destination type, if necessary.
/// @brief Print a cast
-void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
+void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) {
// Print the destination type cast
switch (opc) {
case Instruction::UIToFP:
@@ -917,7 +919,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
- const Type* Ty = CI->getType();
+ Type* Ty = CI->getType();
if (Ty == Type::getInt1Ty(CPV->getContext()))
Out << (CI->getZExtValue() ? '1' : '0');
else if (Ty == Type::getInt32Ty(CPV->getContext()))
@@ -1027,7 +1029,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
printConstantArray(CA, Static);
} else {
assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
- const ArrayType *AT = cast<ArrayType>(CPV->getType());
+ ArrayType *AT = cast<ArrayType>(CPV->getType());
Out << '{';
if (AT->getNumElements()) {
Out << ' ';
@@ -1054,7 +1056,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
printConstantVector(CV, Static);
} else {
assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
- const VectorType *VT = cast<VectorType>(CPV->getType());
+ VectorType *VT = cast<VectorType>(CPV->getType());
Out << "{ ";
Constant *CZ = Constant::getNullValue(VT->getElementType());
printConstant(CZ, Static);
@@ -1074,7 +1076,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
Out << ")";
}
if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
- const StructType *ST = cast<StructType>(CPV->getType());
+ StructType *ST = cast<StructType>(CPV->getType());
Out << '{';
if (ST->getNumElements()) {
Out << ' ';
@@ -1123,7 +1125,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
// care of detecting that case and printing the cast for the ConstantExpr.
bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
bool NeedsExplicitCast = false;
- const Type *Ty = CE->getOperand(0)->getType();
+ Type *Ty = CE->getOperand(0)->getType();
bool TypeIsSigned = false;
switch (CE->getOpcode()) {
case Instruction::Add:
@@ -1175,7 +1177,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
// Extract the operand's type, we'll need it.
- const Type* OpTy = CPV->getType();
+ Type* OpTy = CPV->getType();
// Indicate whether to do the cast or not.
bool shouldCast = false;
@@ -1267,7 +1269,7 @@ std::string CWriter::GetValueName(const Value *Operand) {
void CWriter::writeInstComputationInline(Instruction &I) {
// We can't currently support integer types other than 1, 8, 16, 32, 64.
// Validate this.
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
Ty!=Type::getInt8Ty(I.getContext()) &&
Ty!=Type::getInt16Ty(I.getContext()) &&
@@ -1330,7 +1332,7 @@ void CWriter::writeOperand(Value *Operand, bool Static) {
// This function takes care of detecting that case and printing the cast
// for the Instruction.
bool CWriter::writeInstructionCast(const Instruction &I) {
- const Type *Ty = I.getOperand(0)->getType();
+ Type *Ty = I.getOperand(0)->getType();
switch (I.getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
@@ -1362,7 +1364,7 @@ bool CWriter::writeInstructionCast(const Instruction &I) {
void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
// Extract the operand's type, we'll need it.
- const Type* OpTy = Operand->getType();
+ Type* OpTy = Operand->getType();
// Indicate whether to do the cast or not.
bool shouldCast = false;
@@ -1430,7 +1432,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
bool castIsSigned = Cmp.isSigned();
// If the operand was a pointer, convert to a large integer type.
- const Type* OpTy = Operand->getType();
+ Type* OpTy = Operand->getType();
if (OpTy->isPointerTy())
OpTy = TD->getIntPtrType(Operand->getContext());
@@ -1665,7 +1667,8 @@ bool CWriter::doInitialization(Module &M) {
TAsm = Match->createMCAsmInfo(Triple);
#endif
TAsm = new CBEMCAsmInfo();
- TCtx = new MCContext(*TAsm, NULL);
+ MRI = new MCRegisterInfo();
+ TCtx = new MCContext(*TAsm, *MRI, NULL);
Mang = new Mangler(*TCtx, *TD);
// Keep track of which functions are static ctors/dtors so they can have
@@ -2049,7 +2052,7 @@ void CWriter::printModuleTypes() {
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *ST = StructTypes[i];
- if (ST->isAnonymous() || ST->getName().empty())
+ if (ST->isLiteral() || ST->getName().empty())
UnnamedStructIDs[ST] = NextTypeID++;
std::string Name = getStructName(ST);
@@ -2060,7 +2063,7 @@ void CWriter::printModuleTypes() {
Out << '\n';
// Keep track of which structures have been printed so far.
- SmallPtrSet<const Type *, 16> StructPrinted;
+ SmallPtrSet<Type *, 16> StructPrinted;
// Loop over all structures then push them into the stack so they are
// printed in the correct order.
@@ -2077,8 +2080,8 @@ void CWriter::printModuleTypes() {
//
// TODO: Make this work properly with vector types
//
-void CWriter::printContainedStructs(const Type *Ty,
- SmallPtrSet<const Type *, 16> &StructPrinted) {
+void CWriter::printContainedStructs(Type *Ty,
+ SmallPtrSet<Type *, 16> &StructPrinted) {
// Don't walk through pointers.
if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
return;
@@ -2088,7 +2091,7 @@ void CWriter::printContainedStructs(const Type *Ty,
E = Ty->subtype_end(); I != E; ++I)
printContainedStructs(*I, StructPrinted);
- if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ if (StructType *ST = dyn_cast<StructType>(Ty)) {
// Check to see if we have already printed this struct.
if (!StructPrinted.insert(Ty)) return;
@@ -2120,7 +2123,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
}
// Loop over the arguments, printing them...
- const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+ FunctionType *FT = cast<FunctionType>(F->getFunctionType());
const AttrListPtr &PAL = F->getAttributes();
std::string tstr;
@@ -2150,7 +2153,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
ArgName = GetValueName(I);
else
ArgName = "";
- const Type *ArgTy = I->getType();
+ Type *ArgTy = I->getType();
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
ArgTy = cast<PointerType>(ArgTy)->getElementType();
ByValParams.insert(I);
@@ -2177,7 +2180,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
for (; I != E; ++I) {
if (PrintedArg) FunctionInnards << ", ";
- const Type *ArgTy = *I;
+ Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
@@ -2205,7 +2208,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
FunctionInnards << ')';
// Get the return tpe for the function.
- const Type *RetTy;
+ Type *RetTy;
if (!isStructReturn)
RetTy = F->getReturnType();
else {
@@ -2222,8 +2225,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
static inline bool isFPIntBitCast(const Instruction &I) {
if (!isa<BitCastInst>(I))
return false;
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DstTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DstTy = I.getType();
return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
(DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
}
@@ -2237,7 +2240,7 @@ void CWriter::printFunction(Function &F) {
// If this is a struct return function, handle the result with magic.
if (isStructReturn) {
- const Type *StructTy =
+ Type *StructTy =
cast<PointerType>(F.arg_begin()->getType())->getElementType();
Out << " ";
printType(Out, StructTy, false, "StructReturn");
@@ -2380,22 +2383,29 @@ void CWriter::visitReturnInst(ReturnInst &I) {
void CWriter::visitSwitchInst(SwitchInst &SI) {
+ Value* Cond = SI.getCondition();
+
Out << " switch (";
- writeOperand(SI.getOperand(0));
+ writeOperand(Cond);
Out << ") {\n default:\n";
printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
Out << ";\n";
- for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) {
+
+ unsigned NumCases = SI.getNumCases();
+ // Skip the first item since that's the default case.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ ConstantInt* CaseVal = SI.getCaseValue(i);
+ BasicBlock* Succ = SI.getSuccessor(i);
Out << " case ";
- writeOperand(SI.getOperand(i));
+ writeOperand(CaseVal);
Out << ":\n";
- BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
printBranchToBlock(SI.getParent(), Succ, 2);
if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
Out << " break;\n";
}
+
Out << " }\n";
}
@@ -2656,7 +2666,7 @@ void CWriter::visitFCmpInst(FCmpInst &I) {
Out << ")";
}
-static const char * getFloatBitCastField(const Type *Ty) {
+static const char * getFloatBitCastField(Type *Ty) {
switch (Ty->getTypeID()) {
default: llvm_unreachable("Invalid Type");
case Type::FloatTyID: return "Float";
@@ -2672,8 +2682,8 @@ static const char * getFloatBitCastField(const Type *Ty) {
}
void CWriter::visitCastInst(CastInst &I) {
- const Type *DstTy = I.getType();
- const Type *SrcTy = I.getOperand(0)->getType();
+ Type *DstTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
if (isFPIntBitCast(I)) {
Out << '(';
// These int<->float and long<->double casts need to be handled specially
@@ -2719,7 +2729,7 @@ void CWriter::visitSelectInst(SelectInst &I) {
// Returns the macro name or value of the max or min of an integer type
// (as defined in limits.h).
-static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
+static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax,
raw_ostream &Out) {
const char* type;
const char* sprefix = "";
@@ -2745,16 +2755,16 @@ static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
}
#ifndef NDEBUG
-static bool isSupportedIntegerSize(const IntegerType &T) {
+static bool isSupportedIntegerSize(IntegerType &T) {
return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
T.getBitWidth() == 32 || T.getBitWidth() == 64;
}
#endif
void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
- const FunctionType *funT = F.getFunctionType();
- const Type *retT = F.getReturnType();
- const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+ FunctionType *funT = F.getFunctionType();
+ Type *retT = F.getReturnType();
+ IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
assert(isSupportedIntegerSize(*elemT) &&
"CBackend does not support arbitrary size integers.");
@@ -2829,7 +2839,6 @@ void CWriter::lowerIntrinsics(Function &F) {
if (Function *F = CI->getCalledFunction())
switch (F->getIntrinsicID()) {
case Intrinsic::not_intrinsic:
- case Intrinsic::memory_barrier:
case Intrinsic::vastart:
case Intrinsic::vacopy:
case Intrinsic::vaend:
@@ -2908,8 +2917,8 @@ void CWriter::visitCallInst(CallInst &I) {
Value *Callee = I.getCalledValue();
- const PointerType *PTy = cast<PointerType>(Callee->getType());
- const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
// If this is a call to a struct-return function, assign to the first
// parameter instead of passing it to the call.
@@ -3020,9 +3029,6 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
WroteCallee = true;
return false;
}
- case Intrinsic::memory_barrier:
- Out << "__sync_synchronize()";
- return true;
case Intrinsic::vastart:
Out << "0; ";
@@ -3217,7 +3223,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
std::vector<std::pair<Value*, int> > ResultVals;
if (CI.getType() == Type::getVoidTy(CI.getContext()))
;
- else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) {
+ else if (StructType *ST = dyn_cast<StructType>(CI.getType())) {
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
ResultVals.push_back(std::make_pair(&CI, (int)i));
} else {
@@ -3348,7 +3354,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
// Find out if the last index is into a vector. If so, we have to print this
// specially. Since vectors can't have elements of indexable type, only the
// last index could possibly be of a vector element.
- const VectorType *LastIndexIsVector = 0;
+ VectorType *LastIndexIsVector = 0;
{
for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
@@ -3421,7 +3427,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
Out << ")";
}
-void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType,
+void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType,
bool IsVolatile, unsigned Alignment) {
bool IsUnaligned = Alignment &&
@@ -3463,7 +3469,7 @@ void CWriter::visitStoreInst(StoreInst &I) {
Out << " = ";
Value *Operand = I.getOperand(0);
Constant *BitMask = 0;
- if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+ if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
if (!ITy->isPowerOf2ByteWidth())
// We have a bit width that doesn't match an even power-of-2 byte
// size. Consequently we must & the value with the type's bit mask
@@ -3492,7 +3498,7 @@ void CWriter::visitVAArgInst(VAArgInst &I) {
}
void CWriter::visitInsertElementInst(InsertElementInst &I) {
- const Type *EltTy = I.getType()->getElementType();
+ Type *EltTy = I.getType()->getElementType();
writeOperand(I.getOperand(0));
Out << ";\n ";
Out << "((";
@@ -3507,7 +3513,7 @@ void CWriter::visitInsertElementInst(InsertElementInst &I) {
void CWriter::visitExtractElementInst(ExtractElementInst &I) {
// We know that our operand is not inlined.
Out << "((";
- const Type *EltTy =
+ Type *EltTy =
cast<VectorType>(I.getOperand(0)->getType())->getElementType();
printType(Out, PointerType::getUnqual(EltTy));
Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
@@ -3519,9 +3525,9 @@ void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Out << "(";
printType(Out, SVI.getType());
Out << "){ ";
- const VectorType *VT = SVI.getType();
+ VectorType *VT = SVI.getType();
unsigned NumElts = VT->getNumElements();
- const Type *EltTy = VT->getElementType();
+ Type *EltTy = VT->getElementType();
for (unsigned i = 0; i != NumElts; ++i) {
if (i) Out << ", ";
@@ -3557,9 +3563,9 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
Out << GetValueName(&IVI);
for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
i != e; ++i) {
- const Type *IndexedTy =
+ Type *IndexedTy =
ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
- ArrayRef<unsigned>(b, i+1));
+ makeArrayRef(b, i+1));
if (IndexedTy->isArrayTy())
Out << ".array[" << *i << "]";
else
@@ -3579,9 +3585,9 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
Out << GetValueName(EVI.getOperand(0));
for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
i != e; ++i) {
- const Type *IndexedTy =
+ Type *IndexedTy =
ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
- ArrayRef<unsigned>(b, i+1));
+ makeArrayRef(b, i+1));
if (IndexedTy->isArrayTy())
Out << ".array[" << *i << "]";
else
diff --git a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
index e64216b..4f1ca97 100644
--- a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
+++ b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
@@ -20,8 +20,9 @@
namespace llvm {
struct CTargetMachine : public TargetMachine {
- CTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS)
+ CTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
: TargetMachine(T, TT, CPU, FS) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
diff --git a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
index f7e8ff2..e8274ff 100644
--- a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "CTargetMachine.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheCBackendTarget;
@@ -17,3 +17,5 @@ Target llvm::TheCBackendTarget;
extern "C" void LLVMInitializeCBackendTargetInfo() {
RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
}
+
+extern "C" void LLVMInitializeCBackendTargetMC() {}
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 85fb258..0000000
--- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMCellSPUDesc
- SPUMCTargetDesc.cpp
- SPUMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile
deleted file mode 100644
index 10d9a42..0000000
--- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
index 26c5a4b..d5af2a8 100644
--- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "SPUMCTargetDesc.h"
#include "SPUMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "SPUGenInstrInfo.inc"
@@ -35,8 +37,10 @@ static MCInstrInfo *createSPUMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeCellSPUMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSPUMCRegisterInfo(X, SPU::R0);
+ return X;
}
static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -46,11 +50,43 @@ static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
- createSPUMCSubtargetInfo);
+static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT);
+
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ X->InitMCCodeGenInfo(Reloc::Static, CM);
+ return X;
}
-extern "C" void LLVMInitializeCellSPUMCAsmInfo() {
- RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget);
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget,
+ createSPUMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget,
+ createCellSPUMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
+ createSPUMCSubtargetInfo);
}
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
index c5c037d..a3717b0 100644
--- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides Alpha specific target descriptions.
+// This file provides CellSPU specific target descriptions.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
index fd96694..90b5270 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -29,10 +29,10 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
index a3e7e73..093f99f 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -181,18 +181,6 @@ void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineLocation FPSrc(MachineLocation::VirtualFP);
Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
}
- } else {
- // This is a leaf function -- insert a branch hint iff there are
- // sufficient number instructions in the basic block. Note that
- // this is just a best guess based on the basic block's size.
- if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- dl = MBBI->getDebugLoc();
-
- // Insert terminator label
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
- .addSym(MMI.getContext().CreateTempSymbol());
- }
}
}
@@ -249,14 +237,6 @@ void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(SPU::R1, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const{
// Mark LR and SP unused, since the prolog spills them to stack and
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
index 4fee72d..b837f2c 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
@@ -43,9 +43,6 @@ namespace llvm {
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- //! Perform target-specific stack frame setup.
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
-
//! Return a function's saved spill slots
/*!
For CellSPU, a function's saved spill slots is just the link register.
@@ -77,17 +74,6 @@ namespace llvm {
static int FItoStackOffset(int frame_index) {
return frame_index * stackSlotSize();
}
- //! Number of instructions required to overcome hint-for-branch latency
- /*!
- HBR (hint-for-branch) instructions can be inserted when, for example,
- we know that a given function is going to be called, such as printf(),
- in the control flow graph. HBRs are only inserted if a sufficient number
- of instructions occurs between the HBR and the target. Currently, HBRs
- take 6 cycles, ergo, the magic number 6.
- */
- static int branchHintPenalty() {
- return 6;
- }
};
}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
index f0ceee2..ac33111 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -69,7 +69,7 @@ namespace {
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
EVT ArgVT = Op.getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op.getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
@@ -80,7 +80,7 @@ namespace {
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy =
+ Type *RetTy =
Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
@@ -174,6 +174,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// SPU has no intrinsics for these particular operations:
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
// SPU has no division/remainder instructions
setOperationAction(ISD::SREM, MVT::i8, Expand);
@@ -401,6 +402,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ // Set operation actions to legal types only.
+ if (!isTypeLegal(VT)) continue;
+
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
@@ -438,6 +442,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
setStackPointerRegisterToSaveRestore(SPU::R1);
@@ -497,7 +502,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
// Return the Cell SPU's SETCC result type
//===----------------------------------------------------------------------===//
-MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
+EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
// i8, i16 and i32 are valid SETCC result types
MVT::SimpleValueType retval;
@@ -2727,6 +2732,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
// the type to extend from needs to be i64 or i32.
assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
"LowerSIGN_EXTEND: input and/or output operand have wrong size");
+ (void)OpVT;
// Create shuffle mask
unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
@@ -3216,7 +3222,7 @@ SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode.
bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
- const Type *Ty) const {
+ Type *Ty) const {
// SPU's addresses are 256K:
return (V > -(1 << 18) && V < (1 << 18) - 1);
}
@@ -3239,7 +3245,7 @@ bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
bool
SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type * ) const{
+ Type * ) const{
// A-form: 18bit absolute address.
if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
index d23f6cc..aa4a168 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
@@ -107,7 +107,7 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - Return the ValueType for ISD::SETCC
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
@@ -147,7 +147,7 @@ namespace llvm {
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode.
- virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+ virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
virtual bool isLegalAddressImmediate(GlobalValue *) const;
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -179,7 +179,7 @@ namespace llvm {
virtual bool isLegalICmpImmediate(int64_t Imm) const;
virtual bool isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const;
+ Type *Ty) const;
};
}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
index e67b10c..007bc0e 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -17,9 +17,9 @@
#include "SPUHazardRecognizers.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRINFO_CTOR
@@ -290,6 +290,8 @@ static void removeHBR( MachineBasicBlock &MBB) {
if (I->getOpcode() == SPU::HBRA ||
I->getOpcode() == SPU::HBR_LABEL){
I=MBB.erase(I);
+ if (I == MBB.end())
+ break;
}
}
}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
index e103c9b..f76ebd7 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1594,8 +1594,8 @@ multiclass BitwiseOrImm
{
def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
- def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val),
- [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>;
+ def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>;
// i16i32: hacked version of the ori instruction to extend 16-bit quantities
// to 32-bit quantities. used exclusively to match "anyext" conversions (vide
@@ -3467,8 +3467,10 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
[/* no pattern */]>;
// Indirect branch
- def BI:
- BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ let isIndirectBranch = 1 in {
+ def BI:
+ BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ }
}
// Conditional branches:
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
index 19896c0..bbac6fd 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -187,7 +186,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
const TargetInstrInfo &tii) :
- SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii)
+ SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii)
{
}
@@ -311,28 +310,12 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
unsigned
-SPURegisterInfo::getRARegister() const
-{
- return SPU::R0;
-}
-
-unsigned
SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
{
return SPU::R1;
}
int
-SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- // FIXME: Most probably dwarf numbers differs for Linux and Darwin
- return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int SPURegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
- return SPUGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
-}
-
-int
SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
{
switch(dFormOpcode)
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
index 5e014f8..b7818a4 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
@@ -74,8 +74,6 @@ namespace llvm {
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
RegScavenger *RS = NULL) const;
- //! Get return address register (LR, aka R0)
- unsigned getRARegister() const;
//! Get the stack frame register (SP, aka R1)
unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -83,10 +81,6 @@ namespace llvm {
// New methods added:
//------------------------------------------------------------------------
- //! Get DWARF debugging register number
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
//! Convert D-form load/store to X-form load/store
/*!
Converts a regiser displacement load/store into a register-indexed
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
index 856dc82..43335ab 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -14,7 +14,7 @@
#include "SPUSubtarget.h"
#include "SPU.h"
#include "SPURegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
index 3542a2b..93a7f6e 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -16,7 +16,8 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -31,9 +32,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
return &LR[0];
}
-SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
DataLayout(Subtarget.getTargetDataString()),
InstrInfo(*this),
@@ -41,9 +43,6 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
TLInfo(*this),
TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
- // For the time being, use static relocations, since there's really no
- // support for PIC yet.
- setRelocationModel(Reloc::Static);
}
//===----------------------------------------------------------------------===//
@@ -59,8 +58,16 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
// passes to run just before printing the assembly
bool SPUTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
-{
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ // load the TCE instruction scheduler, if available via
+ // loaded plugins
+ typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
+ BuilderFunc schedulerCreator =
+ (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
+ "createTCESchedulerPass");
+ if (schedulerCreator != NULL)
+ PM.add(schedulerCreator("cellspu"));
+
//align instructions with nops/lnops for dual issue
PM.add(createSPUNopFillerPass(*this));
return true;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
index d96f86d..fffe77c 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
@@ -38,8 +38,9 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
public:
- SPUTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ SPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
/// Return the subtarget implementation object
virtual const SPUSubtarget *getSubtargetImpl() const {
diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
index 049ea23..84aadfa 100644
--- a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "SPU.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheCellSPUTarget;
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
index 10d18f6..ae0e3c4 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -29,7 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include <algorithm>
@@ -77,22 +77,12 @@ extern "C" void LLVMInitializeCppBackendTarget() {
RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
}
-extern "C" void LLVMInitializeCppBackendMCAsmInfo() {}
-
-extern "C" void LLVMInitializeCppBackendMCInstrInfo() {
- RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget);
-}
-
-extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() {
- RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget);
-}
-
namespace {
- typedef std::vector<const Type*> TypeList;
- typedef std::map<const Type*,std::string> TypeMap;
+ typedef std::vector<Type*> TypeList;
+ typedef std::map<Type*,std::string> TypeMap;
typedef std::map<const Value*,std::string> ValueMap;
typedef std::set<std::string> NameSet;
- typedef std::set<const Type*> TypeSet;
+ typedef std::set<Type*> TypeSet;
typedef std::set<const Value*> ValueSet;
typedef std::map<const Value*,std::string> ForwardRefMap;
@@ -143,14 +133,14 @@ namespace {
void printEscapedString(const std::string& str);
void printCFP(const ConstantFP* CFP);
- std::string getCppName(const Type* val);
- inline void printCppName(const Type* val);
+ std::string getCppName(Type* val);
+ inline void printCppName(Type* val);
std::string getCppName(const Value* val);
inline void printCppName(const Value* val);
void printAttributes(const AttrListPtr &PAL, const std::string &name);
- void printType(const Type* Ty);
+ void printType(Type* Ty);
void printTypes(const Module* M);
void printConstant(const Constant *CPV);
@@ -164,7 +154,7 @@ namespace {
void printFunctionHead(const Function *F);
void printFunctionBody(const Function *F);
void printInstruction(const Instruction *I, const std::string& bbname);
- std::string getOpName(Value*);
+ std::string getOpName(const Value*);
void printModuleBody();
};
@@ -184,7 +174,7 @@ static inline void sanitize(std::string &str) {
str[i] = '_';
}
-static std::string getTypePrefix(const Type *Ty) {
+static std::string getTypePrefix(Type *Ty) {
switch (Ty->getTypeID()) {
case Type::VoidTyID: return "void_";
case Type::IntegerTyID:
@@ -339,7 +329,7 @@ void CppWriter::printEscapedString(const std::string &Str) {
}
}
-std::string CppWriter::getCppName(const Type* Ty) {
+std::string CppWriter::getCppName(Type* Ty) {
// First, handle the primitive types .. easy
if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
switch (Ty->getTypeID()) {
@@ -379,7 +369,7 @@ std::string CppWriter::getCppName(const Type* Ty) {
// See if the type has a name in the symboltable and build accordingly
std::string name;
- if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(Ty))
if (STy->hasName())
name = STy->getName();
@@ -393,7 +383,7 @@ std::string CppWriter::getCppName(const Type* Ty) {
return TypeNames[Ty] = name;
}
-void CppWriter::printCppName(const Type* Ty) {
+void CppWriter::printCppName(Type* Ty) {
printEscapedString(getCppName(Ty));
}
@@ -480,6 +470,9 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
HANDLE_ATTR(NoImplicitFloat);
HANDLE_ATTR(Naked);
HANDLE_ATTR(InlineHint);
+ HANDLE_ATTR(ReturnsTwice);
+ HANDLE_ATTR(UWTable);
+ HANDLE_ATTR(NonLazyBind);
#undef HANDLE_ATTR
if (attrs & Attribute::StackAlignment)
Out << " | Attribute::constructStackAlignmentFromInt("
@@ -499,7 +492,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
}
}
-void CppWriter::printType(const Type* Ty) {
+void CppWriter::printType(Type* Ty) {
// We don't print definitions for primitive types
if (Ty->isPrimitiveType() || Ty->isIntegerTy())
return;
@@ -514,13 +507,13 @@ void CppWriter::printType(const Type* Ty) {
// Print the type definition
switch (Ty->getTypeID()) {
case Type::FunctionTyID: {
- const FunctionType* FT = cast<FunctionType>(Ty);
+ FunctionType* FT = cast<FunctionType>(Ty);
Out << "std::vector<Type*>" << typeName << "_args;";
nl(Out);
FunctionType::param_iterator PI = FT->param_begin();
FunctionType::param_iterator PE = FT->param_end();
for (; PI != PE; ++PI) {
- const Type* argTy = static_cast<const Type*>(*PI);
+ Type* argTy = static_cast<Type*>(*PI);
printType(argTy);
std::string argName(getCppName(argTy));
Out << typeName << "_args.push_back(" << argName;
@@ -539,13 +532,21 @@ void CppWriter::printType(const Type* Ty) {
break;
}
case Type::StructTyID: {
- const StructType* ST = cast<StructType>(Ty);
- if (!ST->isAnonymous()) {
- Out << "StructType *" << typeName << " = ";
- Out << "StructType::createNamed(mod->getContext(), \"";
+ StructType* ST = cast<StructType>(Ty);
+ if (!ST->isLiteral()) {
+ Out << "StructType *" << typeName << " = mod->getTypeByName(\"";
+ printEscapedString(ST->getName());
+ Out << "\");";
+ nl(Out);
+ Out << "if (!" << typeName << ") {";
+ nl(Out);
+ Out << typeName << " = ";
+ Out << "StructType::create(mod->getContext(), \"";
printEscapedString(ST->getName());
Out << "\");";
nl(Out);
+ Out << "}";
+ nl(Out);
// Indicate that this type is now defined.
DefinedTypes.insert(Ty);
}
@@ -555,7 +556,7 @@ void CppWriter::printType(const Type* Ty) {
StructType::element_iterator EI = ST->element_begin();
StructType::element_iterator EE = ST->element_end();
for (; EI != EE; ++EI) {
- const Type* fieldTy = static_cast<const Type*>(*EI);
+ Type* fieldTy = static_cast<Type*>(*EI);
printType(fieldTy);
std::string fieldName(getCppName(fieldTy));
Out << typeName << "_fields.push_back(" << fieldName;
@@ -563,21 +564,27 @@ void CppWriter::printType(const Type* Ty) {
nl(Out);
}
- if (ST->isAnonymous()) {
+ if (ST->isLiteral()) {
Out << "StructType *" << typeName << " = ";
Out << "StructType::get(" << "mod->getContext(), ";
} else {
+ Out << "if (" << typeName << "->isOpaque()) {";
+ nl(Out);
Out << typeName << "->setBody(";
}
Out << typeName << "_fields, /*isPacked=*/"
<< (ST->isPacked() ? "true" : "false") << ");";
nl(Out);
+ if (!ST->isLiteral()) {
+ Out << "}";
+ nl(Out);
+ }
break;
}
case Type::ArrayTyID: {
- const ArrayType* AT = cast<ArrayType>(Ty);
- const Type* ET = AT->getElementType();
+ ArrayType* AT = cast<ArrayType>(Ty);
+ Type* ET = AT->getElementType();
printType(ET);
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
@@ -589,8 +596,8 @@ void CppWriter::printType(const Type* Ty) {
break;
}
case Type::PointerTyID: {
- const PointerType* PT = cast<PointerType>(Ty);
- const Type* ET = PT->getElementType();
+ PointerType* PT = cast<PointerType>(Ty);
+ Type* ET = PT->getElementType();
printType(ET);
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
@@ -602,8 +609,8 @@ void CppWriter::printType(const Type* Ty) {
break;
}
case Type::VectorTyID: {
- const VectorType* PT = cast<VectorType>(Ty);
- const Type* ET = PT->getElementType();
+ VectorType* PT = cast<VectorType>(Ty);
+ Type* ET = PT->getElementType();
printType(ET);
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
@@ -766,9 +773,7 @@ void CppWriter::printConstant(const Constant *CV) {
Out << "Constant* " << constName
<< " = ConstantExpr::getGetElementPtr("
<< getCppName(CE->getOperand(0)) << ", "
- << "&" << constName << "_indices[0], "
- << constName << "_indices.size()"
- << ");";
+ << constName << "_indices);";
} else if (CE->isCast()) {
printConstant(CE->getOperand(0));
Out << "Constant* " << constName << " = ConstantExpr::getCast(";
@@ -988,7 +993,7 @@ void CppWriter::printVariableBody(const GlobalVariable *GV) {
}
}
-std::string CppWriter::getOpName(Value* V) {
+std::string CppWriter::getOpName(const Value* V) {
if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
return getCppName(V);
@@ -1053,14 +1058,17 @@ void CppWriter::printInstruction(const Instruction *I,
case Instruction::Switch: {
const SwitchInst *SI = cast<SwitchInst>(I);
Out << "SwitchInst* " << iName << " = SwitchInst::Create("
- << opNames[0] << ", "
- << opNames[1] << ", "
+ << getOpName(SI->getCondition()) << ", "
+ << getOpName(SI->getDefaultDest()) << ", "
<< SI->getNumCases() << ", " << bbname << ");";
nl(Out);
- for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
+ unsigned NumCases = SI->getNumCases();
+ for (unsigned i = 1; i < NumCases; ++i) {
+ const ConstantInt* CaseVal = SI->getCaseValue(i);
+ const BasicBlock* BB = SI->getSuccessor(i);
Out << iName << "->addCase("
- << opNames[i] << ", "
- << opNames[i+1] << ");";
+ << getOpName(CaseVal) << ", "
+ << getOpName(BB) << ");";
nl(Out);
}
break;
@@ -1076,6 +1084,11 @@ void CppWriter::printInstruction(const Instruction *I,
}
break;
}
+ case Instruction::Resume: {
+ Out << "ResumeInst::Create(mod->getContext(), " << opNames[0]
+ << ", " << bbname << ");";
+ break;
+ }
case Instruction::Invoke: {
const InvokeInst* inv = cast<InvokeInst>(I);
Out << "std::vector<Value*> " << iName << "_params;";
@@ -1090,8 +1103,7 @@ void CppWriter::printInstruction(const Instruction *I,
<< getOpName(inv->getCalledFunction()) << ", "
<< getOpName(inv->getNormalDest()) << ", "
<< getOpName(inv->getUnwindDest()) << ", "
- << iName << "_params.begin(), "
- << iName << "_params.end(), \"";
+ << iName << "_params, \"";
printEscapedString(inv->getName());
Out << "\", " << bbname << ");";
nl(Out) << iName << "->setCallingConv(";
@@ -1252,8 +1264,7 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
}
Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
- << opNames[0] << ", " << iName << "_indices.begin(), "
- << iName << "_indices.end()";
+ << opNames[0] << ", " << iName << "_indices";
}
Out << ", \"";
printEscapedString(gep->getName());
@@ -1304,7 +1315,7 @@ void CppWriter::printInstruction(const Instruction *I,
case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
case Instruction::BitCast: Out << "BitCastInst"; break;
- default: assert(!"Unreachable"); break;
+ default: assert(0 && "Unreachable"); break;
}
Out << "(" << opNames[0] << ", "
<< getCppName(cst->getType()) << ", \"";
@@ -1331,8 +1342,7 @@ void CppWriter::printInstruction(const Instruction *I,
}
Out << "CallInst* " << iName << " = CallInst::Create("
<< opNames[call->getNumArgOperands()] << ", "
- << iName << "_params.begin(), "
- << iName << "_params.end(), \"";
+ << iName << "_params, \"";
} else if (call->getNumArgOperands() == 1) {
Out << "CallInst* " << iName << " = CallInst::Create("
<< opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
@@ -1415,7 +1425,7 @@ void CppWriter::printInstruction(const Instruction *I,
Out << "ExtractValueInst* " << getCppName(evi)
<< " = ExtractValueInst::Create(" << opNames[0]
<< ", "
- << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ << iName << "_indices, \"";
printEscapedString(evi->getName());
Out << "\", " << bbname << ");";
break;
@@ -1432,7 +1442,7 @@ void CppWriter::printInstruction(const Instruction *I,
Out << "InsertValueInst* " << getCppName(ivi)
<< " = InsertValueInst::Create(" << opNames[0]
<< ", " << opNames[1] << ", "
- << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ << iName << "_indices, \"";
printEscapedString(ivi->getName());
Out << "\", " << bbname << ");";
break;
@@ -1542,13 +1552,12 @@ void CppWriter::printFunctionUses(const Function* F) {
void CppWriter::printFunctionHead(const Function* F) {
nl(Out) << "Function* " << getCppName(F);
- if (is_inline) {
- Out << " = mod->getFunction(\"";
- printEscapedString(F->getName());
- Out << "\", " << getCppName(F->getFunctionType()) << ");";
- nl(Out) << "if (!" << getCppName(F) << ") {";
- nl(Out) << getCppName(F);
- }
+ Out << " = mod->getFunction(\"";
+ printEscapedString(F->getName());
+ Out << "\");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+
Out<< " = Function::Create(";
nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
nl(Out) << "/*Linkage=*/";
@@ -1585,10 +1594,8 @@ void CppWriter::printFunctionHead(const Function* F) {
Out << "->setGC(\"" << F->getGC() << "\");";
nl(Out);
}
- if (is_inline) {
- Out << "}";
- nl(Out);
- }
+ Out << "}";
+ nl(Out);
printAttributes(F->getAttributes(), getCppName(F));
printCppName(F);
Out << "->setAttributes(" << getCppName(F) << "_PAL);";
@@ -1873,7 +1880,7 @@ void CppWriter::printVariable(const std::string& fname,
void CppWriter::printType(const std::string &fname,
const std::string &typeName) {
- const Type* Ty = TheModule->getTypeByName(typeName);
+ Type* Ty = TheModule->getTypeByName(typeName);
if (!Ty) {
error(std::string("Type '") + typeName + "' not found in input module");
return;
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
index 7322e3e..287e537 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
@@ -22,8 +22,9 @@ namespace llvm {
class formatted_raw_ostream;
struct CPPTargetMachine : public TargetMachine {
- CPPTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS)
+ CPPTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
: TargetMachine(T, TT, CPU, FS) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
index d0aeb12..a8ac0a2 100644
--- a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "CPPTargetMachine.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheCppBackendTarget;
@@ -24,3 +24,5 @@ extern "C" void LLVMInitializeCppBackendTargetInfo() {
"C++ backend",
&CppBackend_TripleMatchQuality);
}
+
+extern "C" void LLVMInitializeCppBackendTargetMC() {}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt
deleted file mode 100644
index 87e7cb5..0000000
--- a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMBlazeAsmParser
- MBlazeAsmLexer.cpp
- MBlazeAsmParser.cpp
- )
-
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
index 1596596..2d357bb 100644
--- a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -7,8 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "MBlaze.h"
-#include "MBlazeTargetMachine.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
@@ -17,10 +16,10 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Target/TargetAsmLexer.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include <string>
#include <map>
@@ -29,7 +28,7 @@ using namespace llvm;
namespace {
- class MBlazeBaseAsmLexer : public TargetAsmLexer {
+ class MBlazeBaseAsmLexer : public MCTargetAsmLexer {
const MCAsmInfo &AsmInfo;
const AsmToken &lexDefinite() {
@@ -42,7 +41,7 @@ namespace {
rmap_ty RegisterMap;
- void InitRegisterMap(const TargetRegisterInfo *info) {
+ void InitRegisterMap(const MCRegisterInfo *info) {
unsigned numRegs = info->getNumRegs();
for (unsigned i = 0; i < numRegs; ++i) {
@@ -76,20 +75,16 @@ namespace {
}
public:
MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : TargetAsmLexer(T), AsmInfo(MAI) {
+ : MCTargetAsmLexer(T), AsmInfo(MAI) {
}
};
class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
public:
- MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI,
+ const MCAsmInfo &MAI)
: MBlazeBaseAsmLexer(T, MAI) {
- std::string tripleString("mblaze-unknown-unknown");
- std::string featureString;
- std::string CPU;
- OwningPtr<const TargetMachine>
- targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
- InitRegisterMap(targetMachine->getRegisterInfo());
+ InitRegisterMap(&MRI);
}
};
}
@@ -123,6 +118,6 @@ AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
}
extern "C" void LLVMInitializeMBlazeAsmLexer() {
- RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
+ RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index eebd9d8..97d311c 100644
--- a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -7,19 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
-#include "MBlazeRegisterInfo.h"
-#include "MBlazeISelLowering.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,7 +27,7 @@ using namespace llvm;
namespace {
struct MBlazeOperand;
-class MBlazeAsmParser : public TargetAsmParser {
+class MBlazeAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
MCAsmParser &getParser() const { return Parser; }
@@ -64,7 +61,7 @@ class MBlazeAsmParser : public TargetAsmParser {
public:
MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : TargetAsmParser(), Parser(_Parser) {}
+ : MCTargetAsmParser(), Parser(_Parser) {}
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -286,19 +283,19 @@ void MBlazeOperand::print(raw_ostream &OS) const {
break;
case Register:
OS << "<register R";
- OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">";
+ OS << getMBlazeRegisterNumbering(getReg()) << ">";
break;
case Token:
OS << "'" << getToken() << "'";
break;
case Memory: {
OS << "<memory R";
- OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase());
+ OS << getMBlazeRegisterNumbering(getMemBase());
OS << ", ";
unsigned RegOff = getMemOffReg();
if (RegOff)
- OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff);
+ OS << "R" << getMBlazeRegisterNumbering(RegOff);
else
OS << getMemOff();
OS << ">";
@@ -326,6 +323,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
unsigned ErrorInfo;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+ default: break;
case Match_Success:
Out.EmitInstruction(Inst);
return false;
@@ -521,7 +519,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
return false;
}
-/// ParseDirective parses the arm specific directives
+/// ParseDirective parses the MBlaze specific directives
bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
@@ -558,7 +556,7 @@ extern "C" void LLVMInitializeMBlazeAsmLexer();
/// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmParser() {
- RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+ RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
LLVMInitializeMBlazeAsmLexer();
}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile
deleted file mode 100644
index 611a0f4..0000000
--- a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMBlazeAsmParser
-
-# Hack: we need to include 'main' MBlaze target directory for private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt
deleted file mode 100644
index 9376e68..0000000
--- a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMBlazeDisassembler
- MBlazeDisassembler.cpp
- )
-
-# workaround for hanging compilation on MSVC9 and 10
-if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
-set_property(
- SOURCE MBlazeDisassembler.cpp
- PROPERTY COMPILE_FLAGS "/Od"
- )
-endif()
-
-add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 88d80a1..fd761f1 100644
--- a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -20,9 +20,9 @@
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
// #include "MBlazeGenDecoderTables.inc"
@@ -60,27 +60,27 @@ static unsigned mblazeBinary2Opcode[] = {
};
static unsigned getRD(uint32_t insn) {
- if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F))
+ if (!isMBlazeRegister((insn>>21)&0x1F))
return UNSUPPORTED;
- return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
+ return getMBlazeRegisterFromNumbering((insn>>21)&0x1F);
}
static unsigned getRA(uint32_t insn) {
- if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F))
+ if (!getMBlazeRegisterFromNumbering((insn>>16)&0x1F))
return UNSUPPORTED;
- return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
+ return getMBlazeRegisterFromNumbering((insn>>16)&0x1F);
}
static unsigned getRB(uint32_t insn) {
- if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F))
+ if (!getMBlazeRegisterFromNumbering((insn>>11)&0x1F))
return UNSUPPORTED;
- return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
+ return getMBlazeRegisterFromNumbering((insn>>11)&0x1F);
}
static int64_t getRS(uint32_t insn) {
- if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF))
+ if (!isSpecialMBlazeRegister(insn&0x3FFF))
return UNSUPPORTED;
- return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
+ return getSpecialMBlazeRegisterFromNumbering(insn&0x3FFF);
}
static int64_t getIMM(uint32_t insn) {
@@ -493,11 +493,12 @@ EDInstInfo *MBlazeDisassembler::getEDInfo() const {
// Public interface for the disassembler
//
-bool MBlazeDisassembler::getInstruction(MCInst &instr,
+MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
- raw_ostream &vStream) const {
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
// The machine instruction.
uint32_t insn;
uint64_t read;
@@ -508,7 +509,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// We want to read exactly 4 bytes of data.
if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
- return false;
+ return Fail;
// Encoded as a big-endian 32-bit word in the stream.
insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
@@ -517,7 +518,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// that it is a valid instruction.
unsigned opcode = getOPCODE(insn);
if (opcode == UNSUPPORTED)
- return false;
+ return Fail;
instr.setOpcode(opcode);
@@ -529,11 +530,11 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
switch ((tsFlags & MBlazeII::FormMask)) {
default:
- return false;
+ return Fail;
case MBlazeII::FRRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RB));
instr.addOperand(MCOperand::CreateReg(RA));
@@ -541,7 +542,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateReg(RB));
@@ -550,23 +551,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRI:
switch (opcode) {
default:
- return false;
+ return Fail;
case MBlaze::MFS:
if (RD == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
break;
case MBlaze::MTS:
if (RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlaze::MSRSET:
case MBlaze::MSRCLR:
if (RD == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
break;
@@ -575,7 +576,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRI:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
switch (opcode) {
@@ -592,35 +593,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FCRR:
if (RA == UNSUPPORTED || RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FCRI:
if (RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
break;
case MBlazeII::FRCR:
if (RD == UNSUPPORTED || RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRCI:
if (RD == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
break;
case MBlazeII::FCCR:
if (RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RB));
break;
@@ -630,7 +631,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRCI:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
@@ -638,35 +639,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRC:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FRCX:
if (RD == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
break;
case MBlazeII::FRCS:
if (RD == UNSUPPORTED || RS == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RS));
break;
case MBlazeII::FCRCS:
if (RS == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RS));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FCRCX:
if (RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
break;
@@ -677,13 +678,13 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FCR:
if (RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRIR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
instr.addOperand(MCOperand::CreateReg(RA));
@@ -693,11 +694,12 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// We always consume 4 bytes of data on success
size = 4;
- return true;
+ return Success;
}
-static MCDisassembler *createMBlazeDisassembler(const Target &T) {
- return new MBlazeDisassembler;
+static MCDisassembler *createMBlazeDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new MBlazeDisassembler(STI);
}
extern "C" void LLVMInitializeMBlazeDisassembler() {
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
index d05eced..0ac0d89 100644
--- a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -32,19 +32,20 @@ class MBlazeDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- MBlazeDisassembler() :
- MCDisassembler() {
+ MBlazeDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
}
~MBlazeDisassembler() {
}
/// getInstruction - See MCDisassembler.
- bool getInstruction(MCInst &instr,
+ MCDisassembler::DecodeStatus getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
- raw_ostream &vStream) const;
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
EDInstInfo *getEDInfo() const;
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile
deleted file mode 100644
index 0530b32..0000000
--- a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMBlazeDisassembler
-
-# Hack: we need to include 'main' MBlaze target directory to grab headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
deleted file mode 100644
index 242a573..0000000
--- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMBlazeAsmPrinter
- MBlazeInstPrinter.cpp
- )
-
-add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
index a7fd287..a1f1dbc 100644
--- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -25,8 +25,10 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#include "MBlazeGenAsmWriter.inc"
-void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index eacca41..570ab08 100644
--- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -24,7 +24,7 @@ namespace llvm {
MBlazeInstPrinter(const MCAsmInfo &MAI)
: MCInstPrinter(MAI) {}
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile
deleted file mode 100644
index 9fb6e86..0000000
--- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMBlazeAsmPrinter
-
-# Hack: we need to include 'main' MBlaze target directory to grab
-# private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.h b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
index 3390794..1399b85 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlaze.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
@@ -15,6 +15,7 @@
#ifndef TARGET_MBLAZE_H
#define TARGET_MBLAZE_H
+#include "MCTargetDesc/MBlazeBaseInfo.h"
#include "MCTargetDesc/MBlazeMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
@@ -22,17 +23,6 @@ namespace llvm {
class MBlazeTargetMachine;
class FunctionPass;
class MachineCodeEmitter;
- class MCCodeEmitter;
- class MCInstrInfo;
- class MCSubtargetInfo;
- class TargetAsmBackend;
- class formatted_raw_ostream;
-
- MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-
- TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 0016df5..97bd083 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -38,10 +38,10 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@@ -136,19 +136,17 @@ void MBlazeAsmPrinter::printSavedRegsBitmask() {
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
+ unsigned RegNum = getMBlazeRegisterNumbering(Reg);
if (MBlaze::GPRRegisterClass->contains(Reg))
CPUBitmask |= (1 << RegNum);
}
// Return Address and Frame registers must also be set in CPUBitmask.
if (TFI->hasFP(*MF))
- CPUBitmask |= (1 << MBlazeRegisterInfo::
- getRegisterNumbering(RI.getFrameRegister(*MF)));
+ CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getFrameRegister(*MF)));
if (MFI->adjustsStack())
- CPUBitmask |= (1 << MBlazeRegisterInfo::
- getRegisterNumbering(RI.getRARegister()));
+ CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getRARegister()));
// Print CPUBitmask
OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
@@ -318,18 +316,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
return I == Pred->end() || !I->getDesc().isBarrier();
}
-static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new MBlazeInstPrinter(MAI);
- return 0;
-}
-
// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmPrinter() {
RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
- TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
- createMBlazeMCInstPrinter);
-
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index e763902..f28d5a7 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//===- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 62dfdcc..8ec548f 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -59,6 +59,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
// MBlaze does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// Set up the register classes
addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
@@ -187,7 +188,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
computeRegisterProperties();
}
-MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+EVT MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i32;
}
@@ -964,13 +965,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// The last register argument that must be saved is MBlaze::R10
TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
- unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
- unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
- unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
+ unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5);
+ unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1);
+ unsigned End = getMBlazeRegisterNumbering(MBlaze::R10);
unsigned StackLoc = Start - Begin + 1;
for (; Start <= End; ++Start, ++StackLoc) {
- unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
+ unsigned Reg = getMBlazeRegisterFromNumbering(Start);
unsigned LiveReg = MF.addLiveIn(Reg, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
@@ -1096,7 +1097,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight(
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
index bb128da..8b49bc3 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -102,7 +102,7 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - get the ISD::SETCC result ValueType
- MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ EVT getSetCCResultType(EVT VT) const;
private:
// Subtarget Info
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 188f10a..7ae05b3 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -17,9 +17,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
@@ -239,7 +239,8 @@ unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand>
+ &Cond) const {
assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
switch (Cond[0].getImm()) {
default: return true;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
index 79f962b..7174405 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -166,62 +166,6 @@ namespace MBlaze {
}
}
-/// MBlazeII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace MBlazeII {
- enum {
- // PseudoFrm - This represents an instruction that is a pseudo instruction
- // or one that has not been implemented yet. It is illegal to code generate
- // it, but tolerated for intermediate implementation stages.
- FPseudo = 0,
- FRRR,
- FRRI,
- FCRR,
- FCRI,
- FRCR,
- FRCI,
- FCCR,
- FCCI,
- FRRCI,
- FRRC,
- FRCX,
- FRCS,
- FCRCS,
- FCRCX,
- FCX,
- FCR,
- FRIR,
- FRRRR,
- FRI,
- FC,
- FormMask = 63
-
- //===------------------------------------------------------------------===//
- // MBlaze Specific MachineOperand flags.
- // MO_NO_FLAG,
-
- /// MO_GOT - Represents the offset into the global offset table at which
- /// the address the relocation entry symbol resides during execution.
- // MO_GOT,
-
- /// MO_GOT_CALL - Represents the offset into the global offset table at
- /// which the address of a call site relocation entry symbol resides
- /// during execution. This is different from the above since this flag
- /// can only be present in call instructions.
- // MO_GOT_CALL,
-
- /// MO_GPREL - Represents the offset from the current gp value to be used
- /// for the relocatable object file being produced.
- // MO_GPREL,
-
- /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
- /// address.
- // MO_ABS_HILO
-
- };
-}
-
class MBlazeInstrInfo : public MBlazeGenInstrInfo {
MBlazeTargetMachine &TM;
const MBlazeRegisterInfo RI;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
index 950f2d7..1d8c987 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -442,17 +442,19 @@ let Predicates=[HasMul] in {
//===----------------------------------------------------------------------===//
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def LBU : LoadM<0x30, 0x000, "lbu ">;
- def LBUR : LoadM<0x30, 0x200, "lbur ">;
+ let neverHasSideEffects = 1 in {
+ def LBU : LoadM<0x30, 0x000, "lbu ">;
+ def LBUR : LoadM<0x30, 0x200, "lbur ">;
- def LHU : LoadM<0x31, 0x000, "lhu ">;
- def LHUR : LoadM<0x31, 0x200, "lhur ">;
+ def LHU : LoadM<0x31, 0x000, "lhu ">;
+ def LHUR : LoadM<0x31, 0x200, "lhur ">;
- def LW : LoadM<0x32, 0x000, "lw ">;
- def LWR : LoadM<0x32, 0x200, "lwr ">;
+ def LW : LoadM<0x32, 0x000, "lw ">;
+ def LWR : LoadM<0x32, 0x200, "lwr ">;
- let Defs = [CARRY] in {
- def LWX : LoadM<0x32, 0x400, "lwx ">;
+ let Defs = [CARRY] in {
+ def LWX : LoadM<0x32, 0x400, "lwx ">;
+ }
}
def LBUI : LoadMI<0x38, "lbui ", zextloadi8>;
@@ -877,6 +879,9 @@ def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
// Peepholes
def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
+// Atomic fence
+def : Pat<(atomic_fence (imm), (imm)), (MEMBARRIER)>;
+
//===----------------------------------------------------------------------===//
// Floating Point Support
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 32d67b2..ea81dd6 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -37,7 +37,7 @@ namespace mblazeIntrinsic {
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
}
-std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
unsigned numTys) const {
static const char *const names[] = {
#define GET_INTRINSIC_NAME_TABLE
@@ -90,8 +90,8 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
#include "MBlazeGenIntrinsics.inc"
#undef GET_INTRINSIC_ATTRIBUTES
-static const FunctionType *getType(LLVMContext &Context, unsigned id) {
- const Type *ResultTy = NULL;
+static FunctionType *getType(LLVMContext &Context, unsigned id) {
+ Type *ResultTy = NULL;
std::vector<Type*> ArgTys;
bool IsVarArg = false;
@@ -103,7 +103,7 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) {
}
Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- const Type **Tys,
+ Type **Tys,
unsigned numTy) const {
assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
index 9804c77..80760d8 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -19,12 +19,12 @@ namespace llvm {
class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
public:
- std::string getName(unsigned IntrID, const Type **Tys = 0,
+ std::string getName(unsigned IntrID, Type **Tys = 0,
unsigned numTys = 0) const;
unsigned lookupName(const char *Name, unsigned Len) const;
unsigned lookupGCCName(const char *Name) const;
bool isOverloaded(unsigned IID) const;
- Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+ Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
unsigned numTys = 0) const;
};
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index f0b201a..9788ba9 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,164 +43,7 @@ using namespace llvm;
MBlazeRegisterInfo::
MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
- : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {}
-
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
-unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
- switch (RegEnum) {
- case MBlaze::R0 : return 0;
- case MBlaze::R1 : return 1;
- case MBlaze::R2 : return 2;
- case MBlaze::R3 : return 3;
- case MBlaze::R4 : return 4;
- case MBlaze::R5 : return 5;
- case MBlaze::R6 : return 6;
- case MBlaze::R7 : return 7;
- case MBlaze::R8 : return 8;
- case MBlaze::R9 : return 9;
- case MBlaze::R10 : return 10;
- case MBlaze::R11 : return 11;
- case MBlaze::R12 : return 12;
- case MBlaze::R13 : return 13;
- case MBlaze::R14 : return 14;
- case MBlaze::R15 : return 15;
- case MBlaze::R16 : return 16;
- case MBlaze::R17 : return 17;
- case MBlaze::R18 : return 18;
- case MBlaze::R19 : return 19;
- case MBlaze::R20 : return 20;
- case MBlaze::R21 : return 21;
- case MBlaze::R22 : return 22;
- case MBlaze::R23 : return 23;
- case MBlaze::R24 : return 24;
- case MBlaze::R25 : return 25;
- case MBlaze::R26 : return 26;
- case MBlaze::R27 : return 27;
- case MBlaze::R28 : return 28;
- case MBlaze::R29 : return 29;
- case MBlaze::R30 : return 30;
- case MBlaze::R31 : return 31;
- case MBlaze::RPC : return 0x0000;
- case MBlaze::RMSR : return 0x0001;
- case MBlaze::REAR : return 0x0003;
- case MBlaze::RESR : return 0x0005;
- case MBlaze::RFSR : return 0x0007;
- case MBlaze::RBTR : return 0x000B;
- case MBlaze::REDR : return 0x000D;
- case MBlaze::RPID : return 0x1000;
- case MBlaze::RZPR : return 0x1001;
- case MBlaze::RTLBX : return 0x1002;
- case MBlaze::RTLBLO : return 0x1003;
- case MBlaze::RTLBHI : return 0x1004;
- case MBlaze::RPVR0 : return 0x2000;
- case MBlaze::RPVR1 : return 0x2001;
- case MBlaze::RPVR2 : return 0x2002;
- case MBlaze::RPVR3 : return 0x2003;
- case MBlaze::RPVR4 : return 0x2004;
- case MBlaze::RPVR5 : return 0x2005;
- case MBlaze::RPVR6 : return 0x2006;
- case MBlaze::RPVR7 : return 0x2007;
- case MBlaze::RPVR8 : return 0x2008;
- case MBlaze::RPVR9 : return 0x2009;
- case MBlaze::RPVR10 : return 0x200A;
- case MBlaze::RPVR11 : return 0x200B;
- default: llvm_unreachable("Unknown register number!");
- }
- return 0; // Not reached
-}
-
-/// getRegisterFromNumbering - Given the enum value for some register, e.g.
-/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
-unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
- switch (Reg) {
- case 0 : return MBlaze::R0;
- case 1 : return MBlaze::R1;
- case 2 : return MBlaze::R2;
- case 3 : return MBlaze::R3;
- case 4 : return MBlaze::R4;
- case 5 : return MBlaze::R5;
- case 6 : return MBlaze::R6;
- case 7 : return MBlaze::R7;
- case 8 : return MBlaze::R8;
- case 9 : return MBlaze::R9;
- case 10 : return MBlaze::R10;
- case 11 : return MBlaze::R11;
- case 12 : return MBlaze::R12;
- case 13 : return MBlaze::R13;
- case 14 : return MBlaze::R14;
- case 15 : return MBlaze::R15;
- case 16 : return MBlaze::R16;
- case 17 : return MBlaze::R17;
- case 18 : return MBlaze::R18;
- case 19 : return MBlaze::R19;
- case 20 : return MBlaze::R20;
- case 21 : return MBlaze::R21;
- case 22 : return MBlaze::R22;
- case 23 : return MBlaze::R23;
- case 24 : return MBlaze::R24;
- case 25 : return MBlaze::R25;
- case 26 : return MBlaze::R26;
- case 27 : return MBlaze::R27;
- case 28 : return MBlaze::R28;
- case 29 : return MBlaze::R29;
- case 30 : return MBlaze::R30;
- case 31 : return MBlaze::R31;
- default: llvm_unreachable("Unknown register number!");
- }
- return 0; // Not reached
-}
-
-unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
- switch (Reg) {
- case 0x0000 : return MBlaze::RPC;
- case 0x0001 : return MBlaze::RMSR;
- case 0x0003 : return MBlaze::REAR;
- case 0x0005 : return MBlaze::RESR;
- case 0x0007 : return MBlaze::RFSR;
- case 0x000B : return MBlaze::RBTR;
- case 0x000D : return MBlaze::REDR;
- case 0x1000 : return MBlaze::RPID;
- case 0x1001 : return MBlaze::RZPR;
- case 0x1002 : return MBlaze::RTLBX;
- case 0x1003 : return MBlaze::RTLBLO;
- case 0x1004 : return MBlaze::RTLBHI;
- case 0x2000 : return MBlaze::RPVR0;
- case 0x2001 : return MBlaze::RPVR1;
- case 0x2002 : return MBlaze::RPVR2;
- case 0x2003 : return MBlaze::RPVR3;
- case 0x2004 : return MBlaze::RPVR4;
- case 0x2005 : return MBlaze::RPVR5;
- case 0x2006 : return MBlaze::RPVR6;
- case 0x2007 : return MBlaze::RPVR7;
- case 0x2008 : return MBlaze::RPVR8;
- case 0x2009 : return MBlaze::RPVR9;
- case 0x200A : return MBlaze::RPVR10;
- case 0x200B : return MBlaze::RPVR11;
- default: llvm_unreachable("Unknown register number!");
- }
- return 0; // Not reached
-}
-
-bool MBlazeRegisterInfo::isRegister(unsigned Reg) {
- return Reg <= 31;
-}
-
-bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) {
- switch (Reg) {
- case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 :
- case 0x0007 : case 0x000B : case 0x000D : case 0x1000 :
- case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 :
- case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 :
- case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 :
- case 0x2008 : case 0x2009 : case 0x200A : case 0x200B :
- return true;
-
- default:
- return false;
- }
- return false; // Not reached
-}
+ : MBlazeGenRegisterInfo(MBlaze::R15), Subtarget(ST), TII(tii) {}
unsigned MBlazeRegisterInfo::getPICCallReg() {
return MBlaze::R20;
@@ -334,10 +176,6 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
}
-unsigned MBlazeRegisterInfo::getRARegister() const {
- return MBlaze::R15;
-}
-
unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -353,11 +191,3 @@ unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
return 0;
}
-
-int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
- return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0);
-}
-
-int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 7ebce21..7e4b269 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -42,14 +42,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
const TargetInstrInfo &tii);
- /// getRegisterNumbering - Given the enum value for some register, e.g.
- /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
- static unsigned getRegisterNumbering(unsigned RegEnum);
- static unsigned getRegisterFromNumbering(unsigned RegEnum);
- static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
- static bool isRegister(unsigned RegEnum);
- static bool isSpecialRegister(unsigned RegEnum);
-
/// Get PIC indirect call register
static unsigned getPICCallReg();
@@ -69,15 +61,11 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
/// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
/// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
index eda141d..7e5667f 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -15,7 +15,7 @@
#include "MBlaze.h"
#include "MBlazeRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 7208874..7bff53e 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -16,48 +16,13 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
- MCContext &Ctx, TargetAsmBackend &TAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin()) {
- llvm_unreachable("MBlaze does not support Darwin MACH-O format");
- return NULL;
- }
-
- if (TheTriple.isOSWindows()) {
- llvm_unreachable("MBlaze does not support Windows COFF format");
- return NULL;
- }
-
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
-}
-
-
extern "C" void LLVMInitializeMBlazeTarget() {
// Register the target.
RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
-
- // Register the MC code emitter
- TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
- llvm::createMBlazeMCCodeEmitter);
-
- // Register the asm backend
- TargetRegistry::RegisterAsmBackend(TheMBlazeTarget,
- createMBlazeAsmBackend);
-
- // Register the object streamer
- TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget,
- createMCStreamer);
-
}
// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -67,21 +32,16 @@ extern "C" void LLVMInitializeMBlazeTarget() {
// offset from the stack/frame pointer, using StackGrowsUp enables
// an easier handling.
MBlazeTargetMachine::
-MBlazeTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS):
- LLVMTargetMachine(T, TT, CPU, FS),
+MBlazeTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM):
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
InstrInfo(*this),
FrameLowering(Subtarget),
TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
- if (getRelocationModel() == Reloc::Default) {
- setRelocationModel(Reloc::Static);
- }
-
- if (getCodeModel() == CodeModel::Default)
- setCodeModel(CodeModel::Small);
}
// Install an instruction selector pass using
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
index cd6caaf..c1bc08a 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -41,8 +41,9 @@ namespace llvm {
InstrItineraryData InstrItins;
public:
- MBlazeTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ MBlazeTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const MBlazeInstrInfo *getInstrInfo() const
{ return &InstrInfo; }
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index abd1b0b..f66ea30 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -69,7 +69,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (Kind.isMergeable1ByteCString())
return false;
- const Type *Ty = GV->getType()->getElementType();
+ Type *Ty = GV->getType()->getElementType();
return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 3d15708..0000000
--- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMMBlazeDesc
- MBlazeMCTargetDesc.cpp
- MBlazeMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 08f14c3..08f7d46a 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -7,10 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmBackend.h"
-#include "MBlaze.h"
-#include "MBlazeELFWriterInfo.h"
-#include "llvm/ADT/Twine.h"
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -20,11 +18,11 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
static unsigned getFixupKindSize(unsigned Kind) {
@@ -48,10 +46,10 @@ public:
/*HasRelocationAddend*/ true) {}
};
-class MBlazeAsmBackend : public TargetAsmBackend {
+class MBlazeAsmBackend : public MCAsmBackend {
public:
MBlazeAsmBackend(const Target &T)
- : TargetAsmBackend() {
+ : MCAsmBackend() {
}
unsigned getNumFixupKinds() const {
@@ -148,8 +146,7 @@ void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
}
} // end anonymous namespace
-TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
new file mode 100644
index 0000000..776dbc4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -0,0 +1,240 @@
+//===-- MBlazeBaseInfo.h - Top level definitions for MBlaze -- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the MBlaze target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeBASEINFO_H
+#define MBlazeBASEINFO_H
+
+#include "MBlazeMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+ enum {
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ FPseudo = 0,
+ FRRR,
+ FRRI,
+ FCRR,
+ FCRI,
+ FRCR,
+ FRCI,
+ FCCR,
+ FCCI,
+ FRRCI,
+ FRRC,
+ FRCX,
+ FRCS,
+ FCRCS,
+ FCRCX,
+ FCX,
+ FCR,
+ FRIR,
+ FRRRR,
+ FRI,
+ FC,
+ FormMask = 63
+
+ //===------------------------------------------------------------------===//
+ // MBlaze Specific MachineOperand flags.
+ // MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ // MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ // MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ // MO_GPREL,
+
+ /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+ /// address.
+ // MO_ABS_HILO
+
+ };
+}
+
+static inline bool isMBlazeRegister(unsigned Reg) {
+ return Reg <= 31;
+}
+
+static inline bool isSpecialMBlazeRegister(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 :
+ case 0x0007 : case 0x000B : case 0x000D : case 0x1000 :
+ case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 :
+ case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 :
+ case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 :
+ case 0x2008 : case 0x2009 : case 0x200A : case 0x200B :
+ return true;
+
+ default:
+ return false;
+ }
+ return false; // Not reached
+}
+
+/// getMBlazeRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) {
+ switch (RegEnum) {
+ case MBlaze::R0 : return 0;
+ case MBlaze::R1 : return 1;
+ case MBlaze::R2 : return 2;
+ case MBlaze::R3 : return 3;
+ case MBlaze::R4 : return 4;
+ case MBlaze::R5 : return 5;
+ case MBlaze::R6 : return 6;
+ case MBlaze::R7 : return 7;
+ case MBlaze::R8 : return 8;
+ case MBlaze::R9 : return 9;
+ case MBlaze::R10 : return 10;
+ case MBlaze::R11 : return 11;
+ case MBlaze::R12 : return 12;
+ case MBlaze::R13 : return 13;
+ case MBlaze::R14 : return 14;
+ case MBlaze::R15 : return 15;
+ case MBlaze::R16 : return 16;
+ case MBlaze::R17 : return 17;
+ case MBlaze::R18 : return 18;
+ case MBlaze::R19 : return 19;
+ case MBlaze::R20 : return 20;
+ case MBlaze::R21 : return 21;
+ case MBlaze::R22 : return 22;
+ case MBlaze::R23 : return 23;
+ case MBlaze::R24 : return 24;
+ case MBlaze::R25 : return 25;
+ case MBlaze::R26 : return 26;
+ case MBlaze::R27 : return 27;
+ case MBlaze::R28 : return 28;
+ case MBlaze::R29 : return 29;
+ case MBlaze::R30 : return 30;
+ case MBlaze::R31 : return 31;
+ case MBlaze::RPC : return 0x0000;
+ case MBlaze::RMSR : return 0x0001;
+ case MBlaze::REAR : return 0x0003;
+ case MBlaze::RESR : return 0x0005;
+ case MBlaze::RFSR : return 0x0007;
+ case MBlaze::RBTR : return 0x000B;
+ case MBlaze::REDR : return 0x000D;
+ case MBlaze::RPID : return 0x1000;
+ case MBlaze::RZPR : return 0x1001;
+ case MBlaze::RTLBX : return 0x1002;
+ case MBlaze::RTLBLO : return 0x1003;
+ case MBlaze::RTLBHI : return 0x1004;
+ case MBlaze::RPVR0 : return 0x2000;
+ case MBlaze::RPVR1 : return 0x2001;
+ case MBlaze::RPVR2 : return 0x2002;
+ case MBlaze::RPVR3 : return 0x2003;
+ case MBlaze::RPVR4 : return 0x2004;
+ case MBlaze::RPVR5 : return 0x2005;
+ case MBlaze::RPVR6 : return 0x2006;
+ case MBlaze::RPVR7 : return 0x2007;
+ case MBlaze::RPVR8 : return 0x2008;
+ case MBlaze::RPVR9 : return 0x2009;
+ case MBlaze::RPVR10 : return 0x200A;
+ case MBlaze::RPVR11 : return 0x200B;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0 : return MBlaze::R0;
+ case 1 : return MBlaze::R1;
+ case 2 : return MBlaze::R2;
+ case 3 : return MBlaze::R3;
+ case 4 : return MBlaze::R4;
+ case 5 : return MBlaze::R5;
+ case 6 : return MBlaze::R6;
+ case 7 : return MBlaze::R7;
+ case 8 : return MBlaze::R8;
+ case 9 : return MBlaze::R9;
+ case 10 : return MBlaze::R10;
+ case 11 : return MBlaze::R11;
+ case 12 : return MBlaze::R12;
+ case 13 : return MBlaze::R13;
+ case 14 : return MBlaze::R14;
+ case 15 : return MBlaze::R15;
+ case 16 : return MBlaze::R16;
+ case 17 : return MBlaze::R17;
+ case 18 : return MBlaze::R18;
+ case 19 : return MBlaze::R19;
+ case 20 : return MBlaze::R20;
+ case 21 : return MBlaze::R21;
+ case 22 : return MBlaze::R22;
+ case 23 : return MBlaze::R23;
+ case 24 : return MBlaze::R24;
+ case 25 : return MBlaze::R25;
+ case 26 : return MBlaze::R26;
+ case 27 : return MBlaze::R27;
+ case 28 : return MBlaze::R28;
+ case 29 : return MBlaze::R29;
+ case 30 : return MBlaze::R30;
+ case 31 : return MBlaze::R31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : return MBlaze::RPC;
+ case 0x0001 : return MBlaze::RMSR;
+ case 0x0003 : return MBlaze::REAR;
+ case 0x0005 : return MBlaze::RESR;
+ case 0x0007 : return MBlaze::RFSR;
+ case 0x000B : return MBlaze::RBTR;
+ case 0x000D : return MBlaze::REDR;
+ case 0x1000 : return MBlaze::RPID;
+ case 0x1001 : return MBlaze::RZPR;
+ case 0x1002 : return MBlaze::RTLBX;
+ case 0x1003 : return MBlaze::RTLBLO;
+ case 0x1004 : return MBlaze::RTLBHI;
+ case 0x2000 : return MBlaze::RPVR0;
+ case 0x2001 : return MBlaze::RPVR1;
+ case 0x2002 : return MBlaze::RPVR2;
+ case 0x2003 : return MBlaze::RPVR3;
+ case 0x2004 : return MBlaze::RPVR4;
+ case 0x2005 : return MBlaze::RPVR5;
+ case 0x2006 : return MBlaze::RPVR6;
+ case 0x2007 : return MBlaze::RPVR7;
+ case 0x2008 : return MBlaze::RPVR8;
+ case 0x2009 : return MBlaze::RPVR9;
+ case 0x200A : return MBlaze::RPVR10;
+ case 0x200B : return MBlaze::RPVR11;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
index ddc636d..1514557 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -12,11 +12,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "MBlaze.h"
-#include "MBlazeInstrInfo.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/ADT/Statistic.h"
@@ -106,7 +108,7 @@ MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO) const {
if (MO.isReg())
- return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg());
+ return getMBlazeRegisterNumbering(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
else if (MO.isExpr())
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index 20d6c0b..43ae281 100644
--- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -13,10 +13,14 @@
#include "MBlazeMCTargetDesc.h"
#include "MBlazeMCAsmInfo.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "MBlazeGenInstrInfo.inc"
@@ -36,8 +40,10 @@ static MCInstrInfo *createMBlazeMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeMBlazeMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+static MCRegisterInfo *createMBlazeMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMBlazeMCRegisterInfo(X, MBlaze::R15);
+ return X;
}
static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -47,11 +53,6 @@ static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
- createMBlazeMCSubtargetInfo);
-}
-
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
@@ -60,6 +61,80 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
-extern "C" void LLVMInitializeMBlazeMCAsmInfo() {
+static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+ return NULL;
+ }
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("MBlaze does not support Windows COFF format");
+ return NULL;
+ }
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new MBlazeInstPrinter(MAI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMBlazeTarget,
+ createMBlazeMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMBlazeTarget,
+ createMBlazeMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
+ createMBlazeMCSubtargetInfo);
+
+ // Register the MC code emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheMBlazeTarget,
+ llvm::createMBlazeMCCodeEmitter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterMCAsmBackend(TheMBlazeTarget,
+ createMBlazeAsmBackend);
+
+ // Register the object streamer
+ TargetRegistry::RegisterMCObjectStreamer(TheMBlazeTarget,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+ createMBlazeMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
index b14772e..deff5cb 100644
--- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -15,12 +15,23 @@
#define MBLAZEMCTARGETDESC_H
namespace llvm {
+class MCAsmBackend;
+class MCContext;
+class MCCodeEmitter;
+class MCInstrInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
+class formatted_raw_ostream;
extern Target TheMBlazeTarget;
+MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT);
+
} // End llvm namespace
// Defines symbolic names for MBlaze registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile
deleted file mode 100644
index 71075ff..0000000
--- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/MBlaze/TargetDesc/Makefile ---------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMBlazeDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
index 16e01db..71210d8 100644
--- a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "MBlaze.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheMBlazeTarget;
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt
deleted file mode 100644
index f5458d5..0000000
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMSP430AsmPrinter
- MSP430InstPrinter.cpp
- )
-add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index e10d4fe..5d6c6ad 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -25,8 +25,10 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#include "MSP430GenAsmWriter.inc"
-void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 50d98b7..a1984a8 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -22,9 +22,9 @@ namespace llvm {
class MSP430InstPrinter : public MCInstPrinter {
public:
MSP430InstPrinter(const MCAsmInfo &MAI)
- : MCInstPrinter(MAI) {}
+ : MCInstPrinter(MAI) {}
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile
deleted file mode 100644
index a5293ab..0000000
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMSP430AsmPrinter
-
-# Hack: we need to include 'main' MSP430 target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 0f3ebd3..0000000
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMMSP430Desc
- MSP430MCTargetDesc.cpp
- MSP430MCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 43a704d..fda70b8 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "MSP430MCTargetDesc.h"
#include "MSP430MCAsmInfo.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "MSP430GenInstrInfo.inc"
@@ -29,18 +31,18 @@
using namespace llvm;
-
static MCInstrInfo *createMSP430MCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitMSP430MCInstrInfo(X);
return X;
}
-extern "C" void LLVMInitializeMSP430MCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMSP430MCRegisterInfo(X, MSP430::PCW);
+ return X;
}
-
static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
@@ -48,11 +50,42 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeMSP430MCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
- createMSP430MCSubtargetInfo);
+static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new MSP430InstPrinter(MAI);
+ return 0;
}
-extern "C" void LLVMInitializeMSP430MCAsmInfo() {
+extern "C" void LLVMInitializeMSP430TargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMSP430Target,
+ createMSP430MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMSP430Target,
+ createMSP430MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+ createMSP430MCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+ createMSP430MCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 0d8a6bd..35f2590 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHAMCTARGETDESC_H
-#define ALPHAMCTARGETDESC_H
+#ifndef MSP430MCTARGETDESC_H
+#define MSP430MCTARGETDESC_H
namespace llvm {
class MCSubtargetInfo;
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile
deleted file mode 100644
index bb85799..0000000
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/MSP430/TargetDesc/Makefile ---------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMSP430Desc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 2042056..8836549 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -32,9 +32,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -163,17 +161,7 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(TmpInst);
}
-static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new MSP430InstPrinter(MAI);
- return 0;
-}
-
// Force static initialization.
extern "C" void LLVMInitializeMSP430AsmPrinter() {
RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
- TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
- createMSP430MCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 0a3eab1..dc37431 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -79,6 +79,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setStackPointerRegisterToSaveRestore(MSP430::SPW);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setSchedulingPreference(Sched::Latency);
// We have post-incremented loads / stores.
@@ -987,8 +988,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-bool MSP430TargetLowering::isTruncateFree(const Type *Ty1,
- const Type *Ty2) const {
+bool MSP430TargetLowering::isTruncateFree(Type *Ty1,
+ Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
@@ -1002,7 +1003,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return (VT1.getSizeInBits() > VT2.getSizeInBits());
}
-bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+bool MSP430TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
// MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index bd660a0..237f604 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -102,7 +102,7 @@ namespace llvm {
/// isTruncateFree - Return true if it's free to truncate a value of type
/// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
/// register R15W to i8 by referencing its sub-register R15B.
- virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
/// isZExtFree - Return true if any actual instruction that defines a value
@@ -113,7 +113,7 @@ namespace llvm {
/// necessarily apply to truncate instructions. e.g. on msp430, all
/// instructions that define 8-bit values implicit zero-extend the result
/// out to 16 bits.
- virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 846d093..ffd4318 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_CTOR
#include "MSP430GenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 1cc60bb..9049c4b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
// FIXME: Provide proper call frame setup / destroy opcodes.
MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
const TargetInstrInfo &tii)
- : MSP430GenRegisterInfo(), TM(tm), TII(tii) {
+ : MSP430GenRegisterInfo(MSP430::PCW), TM(tm), TII(tii) {
StackAlign = TM.getFrameLowering()->getStackAlignment();
}
@@ -233,22 +233,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
}
-unsigned MSP430RegisterInfo::getRARegister() const {
- return MSP430::PCW;
-}
-
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
}
-
-int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("Not implemented yet!");
- return 0;
-}
-
-int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("Not implemented yet!");
- return 0;
-}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
index fb70594..10a3d53 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -58,12 +58,7 @@ public:
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
-
- //! Get DWARF debugging register number
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index b58c50a..3ee14d9 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -13,7 +13,7 @@
#include "MSP430Subtarget.h"
#include "MSP430.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 971f512..4dd8933 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -16,7 +16,7 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeMSP430Target() {
@@ -25,10 +25,11 @@ extern "C" void LLVMInitializeMSP430Target() {
}
MSP430TargetMachine::MSP430TargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ StringRef TT,
+ StringRef CPU,
+ StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
// FIXME: Check TargetData string.
DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
index 2a9eea0..eb483dc 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -38,8 +38,9 @@ class MSP430TargetMachine : public LLVMTargetMachine {
MSP430FrameLowering FrameLowering;
public:
- MSP430TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ MSP430TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index f9ca5c4..8b3e01e 100644
--- a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -9,7 +9,7 @@
#include "MSP430.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheMSP430Target;
diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp
index 46c687b..53ad155f 100644
--- a/contrib/llvm/lib/Target/Mangler.cpp
+++ b/contrib/llvm/lib/Target/Mangler.cpp
@@ -159,7 +159,7 @@ static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
unsigned ArgWords = 0;
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
AI != AE; ++AI) {
- const Type *Ty = AI->getType();
+ Type *Ty = AI->getType();
// 'Dereference' type in case of byval parameter attribute
if (AI->hasByValAttr())
Ty = cast<PointerType>(Ty)->getElementType();
@@ -214,7 +214,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
// fastcall and stdcall functions usually need @42 at the end to specify
// the argument info.
- const FunctionType *FT = F->getFunctionType();
+ FunctionType *FT = F->getFunctionType();
if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
// "Pure" variadic functions do not receive @0 suffix.
(!FT->isVarArg() || FT->getNumParams() == 0 ||
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt
deleted file mode 100644
index 8852fd4..0000000
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMipsAsmPrinter
- MipsInstPrinter.cpp
- )
-add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile b/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile
deleted file mode 100644
index 63e38ef..0000000
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMipsAsmPrinter
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 41c1dd3..3dafc61 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===//
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -69,8 +69,10 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << '$' << LowercaseString(getRegisterName(RegNo));
}
-void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 680208e..5c11165 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===//
+//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -86,7 +86,7 @@ public:
virtual StringRef getOpcodeName(unsigned Opcode) const;
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 97de75d..0000000
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMMipsDesc
- MipsMCTargetDesc.cpp
- MipsMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile
deleted file mode 100644
index 7fe2086..0000000
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Mips/TargetDesc/Makefile -----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMipsDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
new file mode 100644
index 0000000..f190ec4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -0,0 +1,117 @@
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class MipsELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(is64Bit, OSType, EMachine,
+ HasRelocationAddend) {}
+};
+
+class MipsAsmBackend : public MCAsmBackend {
+public:
+ MipsAsmBackend(const Target &T)
+ : MCAsmBackend() {}
+
+ unsigned getNumFixupKinds() const {
+ return 1; //tbd
+ }
+
+ /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
+ /// data fragment, at the offset specified by the fixup and following the
+ /// fixup kind as appropriate.
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ }
+
+ /// @name Target Relaxation Interfaces
+ /// @{
+
+ /// MayNeedRelaxation - Check whether the given instruction may need
+ /// relaxation.
+ ///
+ /// \param Inst - The instruction to test.
+ bool MayNeedRelaxation(const MCInst &Inst) const {
+ return false;
+ }
+
+ /// RelaxInstruction - Relax the instruction in the given fragment to the next
+ /// wider instruction.
+ ///
+ /// \param Inst - The instruction to relax, which may be the same as the
+ /// output.
+ /// \parm Res [output] - On return, the relaxed instruction.
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ }
+
+ /// @}
+
+ /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
+ /// output. If the target cannot generate such a sequence, it should return an
+ /// error.
+ ///
+ /// \return - True on success.
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ return false;
+ }
+};
+
+class MipsEB_AsmBackend : public MipsAsmBackend {
+public:
+ Triple::OSType OSType;
+
+ MipsEB_AsmBackend(const Target &T, Triple::OSType _OSType)
+ : MipsAsmBackend(T), OSType(_OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ false);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
+ }
+};
+
+class MipsEL_AsmBackend : public MipsAsmBackend {
+public:
+ Triple::OSType OSType;
+
+ MipsEL_AsmBackend(const Target &T, Triple::OSType _OSType)
+ : MipsAsmBackend(T), OSType(_OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ true);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
+ }
+};
+}
+
+MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ // just return little endian for now
+ //
+ return new MipsEL_AsmBackend(T, Triple(TT).getOS());
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
new file mode 100644
index 0000000..f7a6fa9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -0,0 +1,113 @@
+//===-- MipsBaseInfo.h - Top level definitions for ARM ------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Mips target useful for the compiler back-end and the MC libraries.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MIPSBASEINFO_H
+#define MIPSBASEINFO_H
+
+#include "MipsMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+/// getMipsRegisterNumbering - Given the enum value for some register,
+/// return the number that it corresponds to.
+inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
+{
+ switch (RegEnum) {
+ case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
+ case Mips::D0:
+ return 0;
+ case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
+ return 1;
+ case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
+ case Mips::D1:
+ return 2;
+ case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
+ return 3;
+ case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
+ case Mips::D2:
+ return 4;
+ case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
+ return 5;
+ case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
+ case Mips::D3:
+ return 6;
+ case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
+ return 7;
+ case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
+ case Mips::D4:
+ return 8;
+ case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
+ return 9;
+ case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
+ case Mips::D5:
+ return 10;
+ case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
+ return 11;
+ case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
+ case Mips::D6:
+ return 12;
+ case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
+ return 13;
+ case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
+ case Mips::D7:
+ return 14;
+ case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
+ return 15;
+ case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
+ case Mips::D8:
+ return 16;
+ case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
+ return 17;
+ case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
+ case Mips::D9:
+ return 18;
+ case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
+ return 19;
+ case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
+ case Mips::D10:
+ return 20;
+ case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
+ return 21;
+ case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
+ case Mips::D11:
+ return 22;
+ case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
+ return 23;
+ case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
+ case Mips::D12:
+ return 24;
+ case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
+ return 25;
+ case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
+ case Mips::D13:
+ return 26;
+ case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
+ return 27;
+ case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
+ case Mips::D14:
+ return 28;
+ case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ return 29;
+ case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
+ case Mips::D15:
+ return 30;
+ case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
+ return 31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
new file mode 100644
index 0000000..8b099ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -0,0 +1,90 @@
+#ifndef LLVM_Mips_MipsFIXUPKINDS_H
+#define LLVM_Mips_MipsFIXUPKINDS_H
+
+//===-- Mips/MipsFixupKinds.h - Mips Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Mips {
+ enum Fixups {
+ // fixup_Mips_xxx - R_MIPS_NONE
+ fixup_Mips_NONE = FirstTargetFixupKind,
+
+ // fixup_Mips_xxx - R_MIPS_16.
+ fixup_Mips_16,
+
+ // fixup_Mips_xxx - R_MIPS_32.
+ fixup_Mips_32,
+
+ // fixup_Mips_xxx - R_MIPS_REL32.
+ fixup_Mips_REL32,
+
+ // fixup_Mips_xxx - R_MIPS_26.
+ fixup_Mips_26,
+
+ // fixup_Mips_xxx - R_MIPS_HI16.
+ fixup_Mips_HI16,
+
+ // fixup_Mips_xxx - R_MIPS_LO16.
+ fixup_Mips_LO16,
+
+ // fixup_Mips_xxx - R_MIPS_GPREL16.
+ fixup_Mips_GPREL16,
+
+ // fixup_Mips_xxx - R_MIPS_LITERAL.
+ fixup_Mips_LITERAL,
+
+ // fixup_Mips_xxx - R_MIPS_GOT16.
+ fixup_Mips_GOT16,
+
+ // fixup_Mips_xxx - R_MIPS_PC16.
+ fixup_Mips_PC16,
+
+ // fixup_Mips_xxx - R_MIPS_CALL16.
+ fixup_Mips_CALL16,
+
+ // fixup_Mips_xxx - R_MIPS_GPREL32.
+ fixup_Mips_GPREL32,
+
+ // fixup_Mips_xxx - R_MIPS_SHIFT5.
+ fixup_Mips_SHIFT5,
+
+ // fixup_Mips_xxx - R_MIPS_SHIFT6.
+ fixup_Mips_SHIFT6,
+
+ // fixup_Mips_xxx - R_MIPS_64.
+ fixup_Mips_64,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_GD.
+ fixup_Mips_TLSGD,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL.
+ fixup_Mips_GOTTPREL,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16.
+ fixup_Mips_TPREL_HI,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16.
+ fixup_Mips_TPREL_LO,
+
+ // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16
+ fixup_Mips_Branch_PCRel,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
+} // namespace llvm
+} // namespace Mips
+
+
+#endif /* LLVM_Mips_MipsFIXUPKINDS_H */
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 5d92425..71ae804 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -18,7 +18,8 @@ using namespace llvm;
MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::mips)
+ if ((TheTriple.getArch() == Triple::mips) ||
+ (TheTriple.getArch() == Triple::mips64))
IsLittleEndian = false;
AlignmentIsInBytes = false;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
new file mode 100644
index 0000000..d66de23
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -0,0 +1,52 @@
+//===-- MipsMCCodeEmitter.cpp - Convert Mips code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+//
+#define DEBUG_TYPE "mccodeemitter"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+
+using namespace llvm;
+
+namespace {
+class MipsMCCodeEmitter : public MCCodeEmitter {
+ MipsMCCodeEmitter(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+
+public:
+ MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti) {}
+
+ ~MipsMCCodeEmitter() {}
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ }
+}; // class MipsMCCodeEmitter
+} // namespace
+
+MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MipsMCCodeEmitter(MCII, STI, Ctx);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 06f0d0b..1f9e3dd 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -13,10 +13,14 @@
#include "MipsMCTargetDesc.h"
#include "MipsMCAsmInfo.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "MipsGenInstrInfo.inc"
@@ -35,11 +39,12 @@ static MCInstrInfo *createMipsMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeMipsMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMipsMCRegisterInfo(X, Mips::RA);
+ return X;
}
-
static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
@@ -47,12 +52,111 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeMipsMCSubtargetInfo() {
+static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new MipsMCAsmInfo(T, TT);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Mips::SP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::PIC_;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ return new MipsInstPrinter(MAI);
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeMipsTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget,
+ createMipsMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, createMipsMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMips64elTarget,
+ createMipsMCRegisterInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
+ createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
+ createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
+ createMipsMCCodeEmitter);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, createMCStreamer);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend);
+
+ // Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
createMipsMCSubtargetInfo);
-}
+ TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget,
+ createMipsMCSubtargetInfo);
-extern "C" void LLVMInitializeMipsMCAsmInfo() {
- RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget);
- RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMipsTarget,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMips64Target,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget,
+ createMipsMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 3d18f11..7a0042a 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//===-- MipsMCTargetDesc.h - Mips Target Descriptions -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,21 +7,32 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides Alpha specific target descriptions.
+// This file provides Mips specific target descriptions.
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHAMCTARGETDESC_H
-#define ALPHAMCTARGETDESC_H
+#ifndef MIPSMCTARGETDESC_H
+#define MIPSMCTARGETDESC_H
namespace llvm {
+class MCAsmBackend;
+class MCInstrInfo;
+class MCCodeEmitter;
+class MCContext;
class MCSubtargetInfo;
-class Target;
class StringRef;
+class Target;
extern Target TheMipsTarget;
extern Target TheMipselTarget;
+extern Target TheMips64Target;
+extern Target TheMips64elTarget;
+
+MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT);
} // End llvm namespace
// Defines symbolic names for Mips registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
index 984b5ad..bacecf2 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.h
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -29,6 +29,9 @@ namespace llvm {
FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM);
+ FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
} // end namespace llvm;
#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
index 433cd57..39c2c16 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -38,6 +38,10 @@ def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
"Enable o32 ABI">;
+def FeatureN32 : SubtargetFeature<"n32", "MipsABI", "N32",
+ "Enable n32 ABI">;
+def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
+ "Enable n64 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
@@ -54,16 +58,19 @@ def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
"Enable 'byte/half swap' instructions.">;
def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
"Enable 'count leading bits' instructions.">;
-def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
- "Mips1 ISA Support">;
-def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
- "Mips2 ISA Support">;
def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
"Mips32 ISA Support",
[FeatureCondMov, FeatureBitCount]>;
def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
"Mips32r2", "Mips32r2 ISA Support",
[FeatureMips32, FeatureSEInReg]>;
+def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
+ "Mips64", "Mips64 ISA Support",
+ [FeatureGP64Bit, FeatureFP64Bit,
+ FeatureMips32]>;
+def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
+ "Mips64r2", "Mips64r2 ISA Support",
+ [FeatureMips64, FeatureMips32r2]>;
//===----------------------------------------------------------------------===//
// Mips processors supported.
@@ -72,21 +79,10 @@ def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MipsGenericItineraries, Features>;
-def : Proc<"mips1", [FeatureMips1]>;
-def : Proc<"r2000", [FeatureMips1]>;
-def : Proc<"r3000", [FeatureMips1]>;
-
-def : Proc<"mips2", [FeatureMips2]>;
-def : Proc<"r6000", [FeatureMips2]>;
-
+def : Proc<"mips32r1", [FeatureMips32]>;
def : Proc<"4ke", [FeatureMips32r2]>;
-
-// Allegrex is a 32bit subset of r4000, both for integer and fp registers,
-// but much more similar to Mips2 than Mips3. It also contains some of
-// Mips32/Mips32r2 instructions and a custom vector fpu processor.
-def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
- FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
- FeatureMinMax, FeatureSwap, FeatureBitCount]>;
+def : Proc<"mips64r1", [FeatureMips64]>;
+def : Proc<"mips64r2", [FeatureMips64r2]>;
def MipsAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
new file mode 100644
index 0000000..49b0223
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -0,0 +1,214 @@
+//===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips64 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def shamt_64 : Operand<i64>;
+
+// Unsigned Operand
+def uimm16_64 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// Transformation Function - get Imm - 32.
+def Subtract32 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() - 32);
+}]>;
+
+// imm32_63 predicate - True if imm is in range [32, 63].
+def imm32_63 : ImmLeaf<i64,
+ [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}],
+ Subtract32>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+// Shifts
+class LogicR_shift_rotate_imm64<bits<6> func, bits<5> _rs, string instr_asm,
+ SDNode OpNode, PatFrag PF>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$b, shamt_64:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, (i64 PF:$c)))],
+ IIAlu> {
+ let rs = _rs;
+}
+
+class LogicR_shift_rotate_reg64<bits<6> func, bits<5> _shamt, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$c, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, CPU64Regs:$c))], IIAlu> {
+ let shamt = _shamt;
+}
+
+// Mul, Div
+let Defs = [HI64, LO64] in {
+ let isCommutable = 1 in
+ class Mul64<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+
+ class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$$zero, $a, $b"),
+ [(op CPU64Regs:$a, CPU64Regs:$b)], itin>;
+}
+
+// Move from Hi/Lo
+let shamt = 0 in {
+let rs = 0, rt = 0 in
+class MoveFromLOHI64<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins),
+ !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
+
+let rt = 0, rd = 0 in
+class MoveToLOHI64<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$src),
+ !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
+}
+
+// Count Leading Ones/Zeros in Word
+class CountLeading64<bits<6> func, string instr_asm, list<dag> pattern>:
+ FR<0x1c, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+ Requires<[HasBitCount]> {
+ let shamt = 0;
+ let rt = rd;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16,
+ CPU64Regs>;
+def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>;
+def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>;
+def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>;
+def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>;
+def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>;
+def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>;
+def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>;
+def SLTu64 : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>;
+def AND64 : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>;
+def OR64 : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>;
+def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>;
+def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>;
+
+/// Shift Instructions
+def DSLL : LogicR_shift_rotate_imm64<0x38, 0x00, "dsll", shl, immZExt5>;
+def DSRL : LogicR_shift_rotate_imm64<0x3a, 0x00, "dsrl", srl, immZExt5>;
+def DSRA : LogicR_shift_rotate_imm64<0x3b, 0x00, "dsra", sra, immZExt5>;
+def DSLL32 : LogicR_shift_rotate_imm64<0x3c, 0x00, "dsll32", shl, imm32_63>;
+def DSRL32 : LogicR_shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl, imm32_63>;
+def DSRA32 : LogicR_shift_rotate_imm64<0x3f, 0x00, "dsra32", sra, imm32_63>;
+def DSLLV : LogicR_shift_rotate_reg64<0x24, 0x00, "dsllv", shl>;
+def DSRLV : LogicR_shift_rotate_reg64<0x26, 0x00, "dsrlv", srl>;
+def DSRAV : LogicR_shift_rotate_reg64<0x27, 0x00, "dsrav", sra>;
+
+// Rotate Instructions
+let Predicates = [HasMips64r2] in {
+ def DROTR : LogicR_shift_rotate_imm64<0x3a, 0x01, "drotr", rotr, immZExt5>;
+ def DROTR32 : LogicR_shift_rotate_imm64<0x3e, 0x01, "drotr32", rotr,
+ imm32_63>;
+ def DROTRV : LogicR_shift_rotate_reg64<0x16, 0x01, "drotrv", rotr>;
+}
+
+/// Load and Store Instructions
+/// aligned
+defm LB64 : LoadM64<0x20, "lb", sextloadi8>;
+defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>;
+defm LH64 : LoadM64<0x21, "lh", sextloadi16_a>;
+defm LHu64 : LoadM64<0x25, "lhu", zextloadi16_a>;
+defm LW64 : LoadM64<0x23, "lw", sextloadi32_a>;
+defm LWu64 : LoadM64<0x27, "lwu", zextloadi32_a>;
+defm SB64 : StoreM64<0x28, "sb", truncstorei8>;
+defm SH64 : StoreM64<0x29, "sh", truncstorei16_a>;
+defm SW64 : StoreM64<0x2b, "sw", truncstorei32_a>;
+defm LD : LoadM64<0x37, "ld", load_a>;
+defm SD : StoreM64<0x3f, "sd", store_a>;
+
+/// unaligned
+defm ULH64 : LoadM64<0x21, "ulh", sextloadi16_u, 1>;
+defm ULHu64 : LoadM64<0x25, "ulhu", zextloadi16_u, 1>;
+defm ULW64 : LoadM64<0x23, "ulw", sextloadi32_u, 1>;
+defm USH64 : StoreM64<0x29, "ush", truncstorei16_u, 1>;
+defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>;
+defm ULD : LoadM64<0x37, "uld", load_u, 1>;
+defm USD : StoreM64<0x3f, "usd", store_u, 1>;
+
+/// Jump and Branch Instructions
+def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>;
+def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>;
+def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
+def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>;
+def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>;
+def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
+
+/// Multiply and Divide Instructions.
+def DMULT : Mul64<0x1c, "dmult", IIImul>;
+def DMULTu : Mul64<0x1d, "dmultu", IIImul>;
+def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>;
+def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>;
+
+let Defs = [HI64] in
+ def MTHI64 : MoveToLOHI64<0x11, "mthi">;
+let Defs = [LO64] in
+ def MTLO64 : MoveToLOHI64<0x13, "mtlo">;
+
+let Uses = [HI64] in
+ def MFHI64 : MoveFromLOHI64<0x10, "mfhi">;
+let Uses = [LO64] in
+ def MFLO64 : MoveFromLOHI64<0x12, "mflo">;
+
+/// Count Leading
+def DCLZ : CountLeading64<0x24, "dclz",
+ [(set CPU64Regs:$dst, (ctlz CPU64Regs:$src))]>;
+def DCLO : CountLeading64<0x25, "dclo",
+ [(set CPU64Regs:$dst, (ctlz (not CPU64Regs:$src)))]>;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i64 immSExt16:$in),
+ (DADDiu ZERO_64, imm:$in)>;
+def : Pat<(i64 immZExt16:$in),
+ (ORi64 ZERO_64, imm:$in)>;
+
+// zextloadi32_u
+def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>,
+ Requires<[IsN64]>;
+def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>,
+ Requires<[NotN64]>;
+
+// hi/lo relocs
+def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>;
+
+defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
+ ZERO_64>;
+
+// setcc patterns
+defm : SeteqPats<CPU64Regs, SLTiu64, XOR64, SLTu64, ZERO_64>;
+defm : SetlePats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 69e03bd..0e82681 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MipsMCInstLower.h"
+#include "MipsMCSymbolRefExpr.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "llvm/BasicBlock.h"
#include "llvm/Instructions.h"
@@ -25,6 +26,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
@@ -33,15 +35,22 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/DebugInfo.h"
using namespace llvm;
+static bool isUnalignedLoadStore(unsigned Opc) {
+ return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu ||
+ Opc == Mips::USW || Opc == Mips::USH ||
+ Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 ||
+ Opc == Mips::USW_P8 || Opc == Mips::USH_P8;
+}
+
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -52,8 +61,21 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
MipsMCInstLower MCInstLowering(Mang, *MF, *this);
+ unsigned Opc = MI->getOpcode();
MCInst TmpInst0;
MCInstLowering.Lower(MI, TmpInst0);
+
+ // Enclose unaligned load or store with .macro & .nomacro directives.
+ if (isUnalignedLoadStore(Opc)) {
+ MCInst Directive;
+ Directive.setOpcode(Mips::MACRO);
+ OutStreamer.EmitInstruction(Directive);
+ OutStreamer.EmitInstruction(TmpInst0);
+ Directive.setOpcode(Mips::NOMACRO);
+ OutStreamer.EmitInstruction(Directive);
+ return;
+ }
+
OutStreamer.EmitInstruction(TmpInst0);
}
@@ -180,7 +202,6 @@ void MipsAsmPrinter::emitFrameDirective() {
const char *MipsAsmPrinter::getCurrentABIString() const {
switch (Subtarget->getTargetABI()) {
case MipsSubtarget::O32: return "abi32";
- case MipsSubtarget::O64: return "abiO64";
case MipsSubtarget::N32: return "abiN32";
case MipsSubtarget::N64: return "abi64";
case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
@@ -304,6 +325,11 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case MipsII::MO_GOTTPREL: O << "%gottprel("; break;
case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break;
case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break;
+ case MipsII::MO_GPOFF_HI: O << "%hi(%neg(%gp_rel("; break;
+ case MipsII::MO_GPOFF_LO: O << "%lo(%neg(%gp_rel("; break;
+ case MipsII::MO_GOT_DISP: O << "%got_disp("; break;
+ case MipsII::MO_GOT_PAGE: O << "%got_page("; break;
+ case MipsII::MO_GOT_OFST: O << "%got_ofst("; break;
}
switch (MO.getType()) {
@@ -424,17 +450,9 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
}
// Force static initialization.
-static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- return new MipsInstPrinter(MAI);
-}
-
extern "C" void LLVMInitializeMipsAsmPrinter() {
RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
-
- TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
- createMipsMCInstPrinter);
+ RegisterAsmPrinter<MipsAsmPrinter> A(TheMips64Target);
+ RegisterAsmPrinter<MipsAsmPrinter> B(TheMips64elTarget);
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
index 876f0fc..0ae4ef6 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -31,6 +31,55 @@ def RetCC_MipsO32 : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
+// Mips N32/64 Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsN : CallingConv<[
+ // FIXME: Handle byval, complex and float double parameters.
+
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64],
+ [D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64]>>,
+
+ // f32 arguments are passed in single precision FP registers.
+ CCIfType<[f32], CCAssignToRegWithShadow<[F12, F13, F14, F15,
+ F16, F17, F18, F19],
+ [A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // f64 arguments are passed in double precision FP registers.
+ CCIfType<[f64], CCAssignToRegWithShadow<[D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64],
+ [A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>,
+ CCIfType<[f32], CCAssignToStack<4, 8>>
+]>;
+
+def RetCC_MipsN : CallingConv<[
+ // FIXME: Handle complex and float double return values.
+
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // i64 are returned in registers V0_64, V1_64
+ CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>,
+
+ // f32 are returned in registers F0, F2
+ CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+ // f64 are returned in registers D0, D2
+ CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
+]>;
+
+//===----------------------------------------------------------------------===//
// Mips EABI Calling Convention
//===----------------------------------------------------------------------===//
@@ -77,10 +126,14 @@ def RetCC_MipsEABI : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_Mips : CallingConv<[
- CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>,
+ CCIfSubtarget<"isABI_N32()", CCDelegateTo<CC_MipsN>>,
+ CCIfSubtarget<"isABI_N64()", CCDelegateTo<CC_MipsN>>
]>;
def RetCC_Mips : CallingConv<[
CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+ CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
+ CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
CCDelegateTo<RetCC_MipsO32>
]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
new file mode 100644
index 0000000..9220d9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -0,0 +1,245 @@
+//===-- Mips/MipsCodeEmitter.cpp - Convert Mips code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Mips machine instructions
+// into relocatable machine code.
+//
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsRelocations.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+
+#include "llvm/CodeGen/MachineOperand.h"
+
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+class MipsCodeEmitter : public MachineFunctionPass {
+ MipsJITInfo *JTI;
+ const MipsInstrInfo *II;
+ const TargetData *TD;
+ const MipsSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo> ();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ public:
+ MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) :
+ MachineFunctionPass(ID), JTI(0),
+ II((const MipsInstrInfo *) tm.getInstrInfo()),
+ TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Mips Machine Code Emitter";
+ }
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Word);
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ };
+}
+
+char MipsCodeEmitter::ID = 0;
+
+bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo();
+ II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo();
+ TD = ((const MipsTargetMachine&) MF.getTarget()).getTargetData();
+ Subtarget = &TM.getSubtarget<MipsSubtarget> ();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = 0;
+ if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ JTI->Initialize(MF, IsPIC);
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
+ MCE.startFunction(MF);
+
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB){
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ // NOTE: This relocations are for static.
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ uint64_t Form = TSFlags & MipsII::FormMask;
+ if (Form == MipsII::FrmJ)
+ return Mips::reloc_mips_26;
+ if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
+ && MI.getDesc().isBranch())
+ return Mips::reloc_mips_branch;
+ if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
+ return Mips::reloc_mips_hi;
+ return Mips::reloc_mips_lo;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return MipsRegisterInfo::getRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+
+void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), 0, MayNeedFarStub));
+}
+
+void MipsCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, 0, 0, false));
+}
+
+void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, false));
+}
+
+void MipsCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, false));
+}
+
+void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB));
+}
+
+void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ // Skip pseudo instructions.
+ if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
+ return;
+
+ ++NumEmitted; // Keep track of the # of mi's emitted
+
+ switch (MI.getOpcode()) {
+ default:
+ emitWordLE(getBinaryCodeForInstr(MI));
+ break;
+ }
+
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void MipsCodeEmitter::emitWordLE(unsigned Word) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Word) << "\n");
+ MCE.emitWordLE(Word);
+}
+
+/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
+/// code to the specified MCE object.
+FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new MipsCodeEmitter(TM, JCE);
+}
+
+unsigned MipsCodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const {
+ // this function will be automatically generated by the CodeEmitterGenerator
+ // using TableGen
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index c3a6211..be3b7a0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Simple pass to fills delay slots with NOPs.
+// Simple pass to fills delay slots with useful instructions.
//
//===----------------------------------------------------------------------===//
@@ -17,18 +17,31 @@
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(FilledSlots, "Number of delay slots filled");
+STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
+ " are not NOP.");
+
+static cl::opt<bool> EnableDelaySlotFiller(
+ "enable-mips-delay-filler",
+ cl::init(false),
+ cl::desc("Fill the Mips delay slots useful instructions."),
+ cl::Hidden);
namespace {
struct Filler : public MachineFunctionPass {
TargetMachine &TM;
const TargetInstrInfo *TII;
+ MachineBasicBlock::iterator LastFiller;
static char ID;
Filler(TargetMachine &tm)
@@ -47,31 +60,61 @@ namespace {
return Changed;
}
+ bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate);
+
+ void insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ void insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ unsigned Reg);
+
+ bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad, bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
+
+ bool
+ findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot,
+ MachineBasicBlock::iterator &Filler);
+
+
};
char Filler::ID = 0;
} // end of anonymous namespace
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
-/// Currently, we fill delay slots with NOPs. We assume there is only one
-/// delay slot per delayed instruction.
+/// We assume there is only one delay slot per delayed instruction.
bool Filler::
-runOnMachineBasicBlock(MachineBasicBlock &MBB)
-{
+runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
- const MCInstrDesc& MCid = I->getDesc();
- if (MCid.hasDelaySlot() &&
- (TM.getSubtarget<MipsSubtarget>().isMips1() ||
- MCid.isCall() || MCid.isBranch() || MCid.isReturn())) {
- MachineBasicBlock::iterator J = I;
- ++J;
- BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
+ LastFiller = MBB.end();
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
++FilledSlots;
Changed = true;
- }
- }
+ MachineBasicBlock::iterator D;
+
+ if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) {
+ MBB.splice(llvm::next(I), &MBB, D);
+ ++UsefulSlots;
+ }
+ else
+ BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+
+ // Record the filler instruction that filled the delay slot.
+ // The instruction after it will be visited in the next iteration.
+ LastFiller = ++I;
+ }
return Changed;
+
}
/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
@@ -80,3 +123,134 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
return new Filler(tm);
}
+bool Filler::findDelayInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator slot,
+ MachineBasicBlock::iterator &Filler) {
+ SmallSet<unsigned, 32> RegDefs;
+ SmallSet<unsigned, 32> RegUses;
+
+ insertDefsUses(slot, RegDefs, RegUses);
+
+ bool sawLoad = false;
+ bool sawStore = false;
+
+ for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) {
+ // skip debug value
+ if (I->isDebugValue())
+ continue;
+
+ // Convert to forward iterator.
+ MachineBasicBlock::iterator FI(llvm::next(I).base());
+
+ if (I->hasUnmodeledSideEffects()
+ || I->isInlineAsm()
+ || I->isLabel()
+ || FI == LastFiller
+ || I->getDesc().isPseudo()
+ //
+ // Should not allow:
+ // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
+ // list. TBD.
+ )
+ break;
+
+ if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) {
+ insertDefsUses(FI, RegDefs, RegUses);
+ continue;
+ }
+
+ Filler = FI;
+ return true;
+ }
+
+ return false;
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad,
+ bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses) {
+ if (candidate->isImplicitDef() || candidate->isKill())
+ return true;
+
+ MCInstrDesc MCID = candidate->getDesc();
+ // Loads or stores cannot be moved past a store to the delay slot
+ // and stores cannot be moved past a load.
+ if (MCID.mayLoad()) {
+ if (sawStore)
+ return true;
+ sawLoad = true;
+ }
+
+ if (MCID.mayStore()) {
+ if (sawStore)
+ return true;
+ sawStore = true;
+ if (sawLoad)
+ return true;
+ }
+
+ assert((!MCID.isCall() && !MCID.isReturn()) &&
+ "Cannot put calls or returns in delay slot.");
+
+ for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+ const MachineOperand &MO = candidate->getOperand(i);
+ unsigned Reg;
+
+ if (!MO.isReg() || !(Reg = MO.getReg()))
+ continue; // skip
+
+ if (MO.isDef()) {
+ // check whether Reg is defined or used before delay slot.
+ if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+ return true;
+ }
+ if (MO.isUse()) {
+ // check whether Reg is defined before delay slot.
+ if (IsRegInSet(RegDefs, Reg))
+ return true;
+ }
+ }
+ return false;
+}
+
+// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses) {
+ // If MI is a call or return, just examine the explicit non-variadic operands.
+ MCInstrDesc MCID = MI->getDesc();
+ unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() :
+ MI->getNumOperands();
+
+ // Add RA to RegDefs to prevent users of RA from going into delay slot.
+ if (MCID.isCall())
+ RegDefs.insert(Mips::RA);
+
+ for (unsigned i = 0; i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg;
+
+ if (!MO.isReg() || !(Reg = MO.getReg()))
+ continue;
+
+ if (MO.isDef())
+ RegDefs.insert(Reg);
+ else if (MO.isUse())
+ RegUses.insert(Reg);
+ }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) {
+ if (RegSet.count(Reg))
+ return true;
+ // check Aliased Registers
+ for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+ *Alias; ++Alias)
+ if (RegSet.count(*Alias))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
index a0f90a0..22d1e47 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -254,9 +254,15 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
}
// Restore GP from the saved stack location
- if (MipsFI->needGPSaveRestore())
- BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
- .addImm(MFI->getObjectOffset(MipsFI->getGPFI()));
+ if (MipsFI->needGPSaveRestore()) {
+ unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI());
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset);
+
+ if (Offset >= 0x8000) {
+ BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO));
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+ }
+ }
}
void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -300,13 +306,6 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void
-MipsFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(Mips::SP, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
void MipsFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
index 78c78ee..c249756 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -27,7 +27,8 @@ protected:
public:
explicit MipsFrameLowering(const MipsSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+ : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0),
+ STI(sti) {
}
bool targetHandlesStackFrameRounding() const;
@@ -39,8 +40,6 @@ public:
bool hasFP(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
-
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;
};
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 90aaeb6..9c831ed 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -86,9 +86,6 @@ private:
// Complex Pattern.
bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
- SDNode *SelectLoadFp64(SDNode *N);
- SDNode *SelectStoreFp64(SDNode *N);
-
// getI32Imm - Return a target constant with the specified
// value, of type i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -114,17 +111,20 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::
SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+ EVT ValTy = Addr.getValueType();
+ unsigned GPReg = ValTy == MVT::i32 ? Mips::GP : Mips::GP_64;
+
// if Address is FI, get the TargetFrameIndex.
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
return true;
}
// on PIC code Load GA
if (TM.getRelocationModel() == Reloc::PIC_) {
if (Addr.getOpcode() == MipsISD::WrapperPIC) {
- Base = CurDAG->getRegister(Mips::GP, MVT::i32);
+ Base = CurDAG->getRegister(GPReg, ValTy);
Offset = Addr.getOperand(0);
return true;
}
@@ -133,7 +133,7 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
Addr.getOpcode() == ISD::TargetGlobalAddress))
return false;
else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
- Base = CurDAG->getRegister(Mips::GP, MVT::i32);
+ Base = CurDAG->getRegister(GPReg, ValTy);
Offset = Addr;
return true;
}
@@ -147,11 +147,11 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
(Addr.getOperand(0)))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
else
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
return true;
}
}
@@ -180,134 +180,10 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
}
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
return true;
}
-SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
- MVT::SimpleValueType NVT =
- N->getValueType(0).getSimpleVT().SimpleTy;
-
- if (!Subtarget.isMips1() || NVT != MVT::f64)
- return NULL;
-
- LoadSDNode *LN = cast<LoadSDNode>(N);
- if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
- LN->getAddressingMode() != ISD::UNINDEXED)
- return NULL;
-
- SDValue Chain = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue Offset0, Offset1, Base;
-
- if (!SelectAddr(N1, Base, Offset0) ||
- N1.getValueType() != MVT::i32)
- return NULL;
-
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- DebugLoc dl = N->getDebugLoc();
-
- // The second load should start after for 4 bytes.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
- Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
- else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0))
- Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(),
- MVT::i32,
- CP->getAlignment(),
- CP->getOffset()+4,
- CP->getTargetFlags());
- else
- return NULL;
-
- // Choose the offsets depending on the endianess
- if (TM.getTargetData()->isBigEndian())
- std::swap(Offset0, Offset1);
-
- // Instead of:
- // ldc $f0, X($3)
- // Generate:
- // lwc $f0, X($3)
- // lwc $f1, X+4($3)
- SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
- MVT::Other, Base, Offset0, Chain);
- SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, NVT), 0);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
- MVT::f64, Undef, SDValue(LD0, 0));
-
- SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
- MVT::Other, Base, Offset1, SDValue(LD0, 1));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
- MVT::f64, I0, SDValue(LD1, 0));
-
- ReplaceUses(SDValue(N, 0), I1);
- ReplaceUses(SDValue(N, 1), Chain);
- cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1);
- cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1);
- return I1.getNode();
-}
-
-SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
-
- if (!Subtarget.isMips1() ||
- N->getOperand(1).getValueType() != MVT::f64)
- return NULL;
-
- SDValue Chain = N->getOperand(0);
-
- StoreSDNode *SN = cast<StoreSDNode>(N);
- if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
- return NULL;
-
- SDValue N1 = N->getOperand(1);
- SDValue N2 = N->getOperand(2);
- SDValue Offset0, Offset1, Base;
-
- if (!SelectAddr(N2, Base, Offset0) ||
- N1.getValueType() != MVT::f64 ||
- N2.getValueType() != MVT::i32)
- return NULL;
-
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- DebugLoc dl = N->getDebugLoc();
-
- // Get the even and odd part from the f64 register
- SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
- dl, MVT::f32, N1);
- SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
- dl, MVT::f32, N1);
-
- // The second store should start after for 4 bytes.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
- Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
- else
- return NULL;
-
- // Choose the offsets depending on the endianess
- if (TM.getTargetData()->isBigEndian())
- std::swap(Offset0, Offset1);
-
- // Instead of:
- // sdc $f0, X($3)
- // Generate:
- // swc $f0, X($3)
- // swc $f1, X+4($3)
- SDValue Ops0[] = { FPEven, Base, Offset0, Chain };
- Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
- MVT::Other, Ops0, 4), 0);
- cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
-
- SDValue Ops1[] = { FPOdd, Base, Offset1, Chain };
- Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
- MVT::Other, Ops1, 4), 0);
- cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
-
- ReplaceUses(SDValue(N, 0), Chain);
- return Chain.getNode();
-}
-
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
@@ -364,6 +240,8 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
/// Mul with two results
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
+ assert(Node->getValueType(0) != MVT::i64 &&
+ "64-bit multiplication with two results not handled.");
SDValue Op1 = Node->getOperand(0);
SDValue Op2 = Node->getOperand(1);
@@ -389,21 +267,29 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
/// Special Muls
case ISD::MUL:
- if (Subtarget.isMips32())
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && Node->getValueType(0) == MVT::i32)
break;
case ISD::MULHS:
case ISD::MULHU: {
+ assert((Opcode == ISD::MUL || Node->getValueType(0) != MVT::i64) &&
+ "64-bit MULH* not handled.");
+ EVT Ty = Node->getValueType(0);
SDValue MulOp1 = Node->getOperand(0);
SDValue MulOp2 = Node->getOperand(1);
- unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ unsigned MulOp = (Opcode == ISD::MULHU ?
+ Mips::MULTu :
+ (Ty == MVT::i32 ? Mips::MULT : Mips::DMULT));
SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
MVT::Glue, MulOp1, MulOp2);
SDValue InFlag = SDValue(MulNode, 0);
- if (Opcode == ISD::MUL)
- return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag);
+ if (Opcode == ISD::MUL) {
+ unsigned Opc = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ return CurDAG->getMachineNode(Opc, dl, Ty, InFlag);
+ }
else
return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
}
@@ -417,31 +303,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
Mips::ZERO, MVT::i32);
- SDValue Undef = SDValue(
- CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
- SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
- MVT::f64, Undef, SDValue(MTC, 0));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
- MVT::f64, I0, SDValue(MTC, 0));
- ReplaceUses(SDValue(Node, 0), I1);
- return I1.getNode();
+ return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
+ Zero);
}
break;
}
- case ISD::LOAD:
- if (SDNode *ResNode = SelectLoadFp64(Node))
- return ResNode;
- // Other cases are autogenerated.
- break;
-
- case ISD::STORE:
- if (SDNode *ResNode = SelectStoreFp64(Node))
- return ResNode;
- // Other cases are autogenerated.
- break;
-
case MipsISD::ThreadPointer: {
unsigned SrcReg = Mips::HWR29;
unsigned DestReg = Mips::V1;
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index b4f4b1b..1932e74 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -35,6 +35,18 @@
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+// If I is a shifted mask, set the size (Size) and the first bit of the
+// mask (Pos), and return true.
+// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
+static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
+ if (!isUInt<32>(I) || !isShiftedMask_32(I))
+ return false;
+
+ Size = CountPopulation_32(I);
+ Pos = CountTrailingZeros_32(I);
+ return true;
+}
+
const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
case MipsISD::JmpLink: return "MipsISD::JmpLink";
@@ -61,27 +73,38 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC";
case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
+ case MipsISD::Sync: return "MipsISD::Sync";
+ case MipsISD::Ext: return "MipsISD::Ext";
+ case MipsISD::Ins: return "MipsISD::Ins";
default: return NULL;
}
}
MipsTargetLowering::
MipsTargetLowering(MipsTargetMachine &TM)
- : TargetLowering(TM, new MipsTargetObjectFile()) {
- Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ : TargetLowering(TM, new MipsTargetObjectFile()),
+ Subtarget(&TM.getSubtarget<MipsSubtarget>()),
+ HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) {
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// Set up the register classes
addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+ if (HasMips64)
+ addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass);
+
// When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat())
- if (!Subtarget->isFP64bit())
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, Mips::FGR64RegisterClass);
+ else
addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+ }
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
@@ -100,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Mips Custom Operations
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
@@ -115,6 +139,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
// Operations not directly supported by Mips.
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -126,10 +154,14 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
- if (!Subtarget->isMips32r2())
+ if (!Subtarget->hasMips32r2())
setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ if (!Subtarget->hasMips64r2())
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
@@ -159,7 +191,14 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Use the default for now
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
+ setInsertFencesForAtomic(true);
if (Subtarget->isSingleFloat())
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
@@ -180,6 +219,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::SDIVREM);
setTargetDAGCombine(ISD::UDIVREM);
setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
setMinFunctionAlignment(2);
@@ -190,7 +231,12 @@ MipsTargetLowering(MipsTargetMachine &TM)
setExceptionSelectorRegister(Mips::A1);
}
-MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const {
+bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+ return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16;
+}
+
+EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i32;
}
@@ -348,7 +394,7 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->isMips32() && SelectMadd(N, &DAG))
+ if (Subtarget->hasMips32() && SelectMadd(N, &DAG))
return SDValue(N, 0);
return SDValue();
@@ -360,7 +406,7 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->isMips32() && SelectMsub(N, &DAG))
+ if (Subtarget->hasMips32() && SelectMsub(N, &DAG))
return SDValue(N, 0);
return SDValue();
@@ -372,6 +418,9 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ EVT Ty = N->getValueType(0);
+ unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
+ unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
MipsISD::DivRemU;
DebugLoc dl = N->getDebugLoc();
@@ -383,7 +432,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
// insert MFLO
if (N->hasAnyUseOfValue(0)) {
- SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32,
+ SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty,
InGlue);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
InChain = CopyFromLo.getValue(1);
@@ -393,7 +442,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
// insert MFHI
if (N->hasAnyUseOfValue(1)) {
SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
- Mips::HI, MVT::i32, InGlue);
+ HI, Ty, InGlue);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
}
@@ -490,6 +539,101 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
}
+static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ // Pattern match EXT.
+ // $dst = and ((sra or srl) $src , pos), (2**size - 1)
+ // => ext $dst, $src, size, pos
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ return SDValue();
+
+ SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
+
+ // Op's first operand must be a shift right.
+ if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The second operand of the shift must be an immediate.
+ uint64_t Pos;
+ ConstantSDNode *CN;
+ if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ uint64_t SMPos, SMSize;
+ // Op's second operand must be a shifted mask.
+ if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
+ !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ return SDValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its size
+ // and Pos exceeds the word's size.
+ if (SMPos != 0 || Pos + SMSize > 32)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32,
+ ShiftRight.getOperand(0),
+ DAG.getConstant(Pos, MVT::i32),
+ DAG.getConstant(SMSize, MVT::i32));
+}
+
+static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ // Pattern match INS.
+ // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
+ // where mask1 = (2**size - 1) << pos, mask0 = ~mask1
+ // => ins $dst, $src, size, pos, $src1
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ return SDValue();
+
+ SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
+ uint64_t SMPos0, SMSize0, SMPos1, SMSize1;
+ ConstantSDNode *CN;
+
+ // See if Op's first operand matches (and $src1 , mask0).
+ if (And0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
+ !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
+ return SDValue();
+
+ // See if Op's second operand matches (and (shl $src, pos), mask1).
+ if (And1.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
+ !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+ return SDValue();
+
+ // The shift masks must have the same position and size.
+ if (SMPos0 != SMPos1 || SMSize0 != SMSize1)
+ return SDValue();
+
+ SDValue Shl = And1.getOperand(0);
+ if (Shl.getOpcode() != ISD::SHL)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
+ return SDValue();
+
+ unsigned Shamt = CN->getZExtValue();
+
+ // Return if the shift amount and the first bit position of mask are not the
+ // same.
+ if (Shamt != SMPos0)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32,
+ Shl.getOperand(0),
+ DAG.getConstant(SMPos0, MVT::i32),
+ DAG.getConstant(SMSize0, MVT::i32),
+ And0.getOperand(0));
+}
+
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
@@ -506,6 +650,10 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return PerformDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SETCC:
return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ case ISD::AND:
+ return PerformANDCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR:
+ return PerformORCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -527,6 +675,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
}
return SDValue();
}
@@ -733,13 +883,13 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
- unsigned Dest = MI->getOperand(0).getReg();
+ unsigned OldVal = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
unsigned Incr = MI->getOperand(2).getReg();
- unsigned Oldval = RegInfo.createVirtualRegister(RC);
- unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned AndRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -758,61 +908,38 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// thisMBB:
// ...
- // sw incr, fi(sp) // store incr to stack (when BinOpcode == 0)
// fallthrough --> loopMBB
-
- // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before
- // the loop and then loading it from stack in block loopMBB is necessary to
- // prevent MachineLICM pass to hoist "or" instruction out of the block
- // loopMBB.
-
- int fi = 0;
- if (BinOpcode == 0 && !Nand) {
- // Get or create a temporary stack location.
- MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
- fi = MipsFI->getAtomicFrameIndex();
- if (fi == -1) {
- fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
- MipsFI->setAtomicFrameIndex(fi);
- }
-
- BuildMI(BB, dl, TII->get(Mips::SW))
- .addReg(Incr).addFrameIndex(fi).addImm(0);
- }
BB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(exitMBB);
// loopMBB:
// ll oldval, 0(ptr)
- // or dest, $0, oldval
- // <binop> tmp1, oldval, incr
- // sc tmp1, 0(ptr)
- // beq tmp1, $0, loopMBB
+ // <binop> storeval, oldval, incr
+ // sc success, storeval, 0(ptr)
+ // beq success, $0, loopMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval);
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0);
if (Nand) {
- // and tmp2, oldval, incr
- // nor tmp1, $0, tmp2
- BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr);
- BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+ // and andres, oldval, incr
+ // nor storeval, $0, andres
+ BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal)
+ .addReg(Mips::ZERO).addReg(AndRes);
} else if (BinOpcode) {
- // <binop> tmp1, oldval, incr
- BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr);
+ // <binop> storeval, oldval, incr
+ BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
} else {
- // lw tmp2, fi(sp) // load incr from stack
- // or tmp1, $zero, tmp2
- BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+ StoreVal = Incr;
}
- BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB);
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
MI->eraseFromParent(); // The instruction is gone now.
- return BB;
+ return exitMBB;
}
MachineBasicBlock *
@@ -833,33 +960,34 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
unsigned Ptr = MI->getOperand(1).getReg();
unsigned Incr = MI->getOperand(2).getReg();
- unsigned Addr = RegInfo.createVirtualRegister(RC);
- unsigned Shift = RegInfo.createVirtualRegister(RC);
+ unsigned AlignedAddr = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
unsigned Mask = RegInfo.createVirtualRegister(RC);
unsigned Mask2 = RegInfo.createVirtualRegister(RC);
- unsigned Newval = RegInfo.createVirtualRegister(RC);
- unsigned Oldval = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal = RegInfo.createVirtualRegister(RC);
unsigned Incr2 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp10 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp11 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp12 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
+ unsigned AndRes = RegInfo.createVirtualRegister(RC);
+ unsigned BinOpRes = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
+ unsigned SrlRes = RegInfo.createVirtualRegister(RC);
+ unsigned SllRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = BB;
++It;
MF->insert(It, loopMBB);
+ MF->insert(It, sinkMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
@@ -868,111 +996,104 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+ BB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(sinkMBB);
+ sinkMBB->addSuccessor(exitMBB);
+
// thisMBB:
- // addiu tmp1,$0,-4 # 0xfffffffc
- // and addr,ptr,tmp1
- // andi tmp2,ptr,3
- // sll shift,tmp2,3
- // ori tmp3,$0,255 # 0xff
- // sll mask,tmp3,shift
+ // addiu masklsb2,$0,-4 # 0xfffffffc
+ // and alignedaddr,ptr,masklsb2
+ // andi ptrlsb2,ptr,3
+ // sll shiftamt,ptrlsb2,3
+ // ori maskupper,$0,255 # 0xff
+ // sll mask,maskupper,shiftamt
// nor mask2,$0,mask
- // andi tmp4,incr,255
- // sll incr2,tmp4,shift
- // sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0)
-
- // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before
- // the loop and then loading it from stack in block loopMBB is necessary to
- // prevent MachineLICM pass to hoist "or" instruction out of the block
- // loopMBB.
+ // sll incr2,incr,shiftamt
int64_t MaskImm = (Size == 1) ? 255 : 65535;
- BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
- BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
+ BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ .addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ .addReg(Ptr).addReg(MaskLSB2);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ .addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ .addReg(ShiftAmt).addReg(MaskUpper);
BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
- if (BinOpcode != Mips::SUBu) {
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift);
- } else {
- BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift);
- }
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
- int fi = 0;
- if (BinOpcode == 0 && !Nand) {
- // Get or create a temporary stack location.
- MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
- fi = MipsFI->getAtomicFrameIndex();
- if (fi == -1) {
- fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
- MipsFI->setAtomicFrameIndex(fi);
- }
-
- BuildMI(BB, dl, TII->get(Mips::SW))
- .addReg(Incr2).addFrameIndex(fi).addImm(0);
- }
- BB->addSuccessor(loopMBB);
+ // atomic.load.binop
+ // loopMBB:
+ // ll oldval,0(alignedaddr)
+ // binop binopres,oldval,incr2
+ // and newval,binopres,mask
+ // and maskedoldval0,oldval,mask2
+ // or storeval,maskedoldval0,newval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loopMBB
+
+ // atomic.swap
// loopMBB:
- // ll oldval,0(addr)
- // binop tmp7,oldval,incr2
- // and newval,tmp7,mask
- // and tmp8,oldval,mask2
- // or tmp9,tmp8,newval
- // sc tmp9,0(addr)
- // beq tmp9,$0,loopMBB
+ // ll oldval,0(alignedaddr)
+ // and newval,incr2,mask
+ // and maskedoldval0,oldval,mask2
+ // or storeval,maskedoldval0,newval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loopMBB
+
BB = loopMBB;
- BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
if (Nand) {
- // and tmp6, oldval, incr2
- // nor tmp7, $0, tmp6
- BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2);
- BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
- } else if (BinOpcode == Mips::SUBu) {
- // addu tmp7, oldval, incr2
- BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2);
+ // and andres, oldval, incr2
+ // nor binopres, $0, andres
+ // and newval, binopres, mask
+ BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes)
+ .addReg(Mips::ZERO).addReg(AndRes);
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
} else if (BinOpcode) {
- // <binop> tmp7, oldval, incr2
- BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2);
- } else {
- // lw tmp6, fi(sp) // load incr2 from stack
- // or tmp7, $zero, tmp6
- BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
+ // <binop> binopres, oldval, incr2
+ // and newval, binopres, mask
+ BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ } else {// atomic.swap
+ // and newval, incr2, mask
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
}
- BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval);
- BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0);
+
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ .addReg(OldVal).addReg(Mask2);
+ BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ .addReg(MaskedOldVal0).addReg(NewVal);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB);
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // and tmp10,oldval,mask
- // srl tmp11,tmp10,shift
- // sll tmp12,tmp11,24
- // sra dest,tmp12,24
- BB = exitMBB;
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
+
+ // sinkMBB:
+ // and maskedoldval1,oldval,mask
+ // srl srlres,maskedoldval1,shiftamt
+ // sll sllres,srlres,24
+ // sra dest,sllres,24
+ BB = sinkMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
- // reverse order
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
- .addReg(Tmp12).addImm(ShiftImm);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12)
- .addReg(Tmp11).addImm(ShiftImm);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11)
- .addReg(Tmp10).addReg(Shift);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10)
- .addReg(Oldval).addReg(Mask);
+
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ .addReg(OldVal).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ .addReg(ShiftAmt).addReg(MaskedOldVal1);
+ BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ .addReg(SrlRes).addImm(ShiftImm);
+ BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ .addReg(SllRes).addImm(ShiftImm);
MI->eraseFromParent(); // The instruction is gone now.
- return BB;
+ return exitMBB;
}
MachineBasicBlock *
@@ -989,11 +1110,10 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
- unsigned Oldval = MI->getOperand(2).getReg();
- unsigned Newval = MI->getOperand(3).getReg();
+ unsigned OldVal = MI->getOperand(2).getReg();
+ unsigned NewVal = MI->getOperand(3).getReg();
- unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -1012,26 +1132,14 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- // Get or create a temporary stack location.
- MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
- int fi = MipsFI->getAtomicFrameIndex();
- if (fi == -1) {
- fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
- MipsFI->setAtomicFrameIndex(fi);
- }
-
// thisMBB:
// ...
- // sw newval, fi(sp) // store newval to stack
// fallthrough --> loop1MBB
-
- // Note: storing newval to stack before the loop and then loading it from
- // stack in block loop2MBB is necessary to prevent MachineLICM pass to
- // hoist "or" instruction out of the block loop2MBB.
-
- BuildMI(BB, dl, TII->get(Mips::SW))
- .addReg(Newval).addFrameIndex(fi).addImm(0);
BB->addSuccessor(loop1MBB);
+ loop1MBB->addSuccessor(exitMBB);
+ loop1MBB->addSuccessor(loop2MBB);
+ loop2MBB->addSuccessor(loop1MBB);
+ loop2MBB->addSuccessor(exitMBB);
// loop1MBB:
// ll dest, 0(ptr)
@@ -1039,27 +1147,20 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
BB = loop1MBB;
BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BNE))
- .addReg(Dest).addReg(Oldval).addMBB(exitMBB);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(loop2MBB);
+ .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
// loop2MBB:
- // lw tmp2, fi(sp) // load newval from stack
- // or tmp1, $0, tmp2
- // sc tmp1, 0(ptr)
- // beq tmp1, $0, loop1MBB
+ // sc success, newval, 0(ptr)
+ // beq success, $0, loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
- BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(NewVal).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
MI->eraseFromParent(); // The instruction is gone now.
- return BB;
+ return exitMBB;
}
MachineBasicBlock *
@@ -1077,36 +1178,39 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
- unsigned Oldval = MI->getOperand(2).getReg();
- unsigned Newval = MI->getOperand(3).getReg();
+ unsigned CmpVal = MI->getOperand(2).getReg();
+ unsigned NewVal = MI->getOperand(3).getReg();
- unsigned Addr = RegInfo.createVirtualRegister(RC);
- unsigned Shift = RegInfo.createVirtualRegister(RC);
+ unsigned AlignedAddr = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
unsigned Mask = RegInfo.createVirtualRegister(RC);
unsigned Mask2 = RegInfo.createVirtualRegister(RC);
- unsigned Oldval2 = RegInfo.createVirtualRegister(RC);
- unsigned Oldval3 = RegInfo.createVirtualRegister(RC);
- unsigned Oldval4 = RegInfo.createVirtualRegister(RC);
- unsigned Newval2 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned SrlRes = RegInfo.createVirtualRegister(RC);
+ unsigned SllRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = BB;
++It;
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
+ MF->insert(It, sinkMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
@@ -1115,76 +1219,90 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+ BB->addSuccessor(loop1MBB);
+ loop1MBB->addSuccessor(sinkMBB);
+ loop1MBB->addSuccessor(loop2MBB);
+ loop2MBB->addSuccessor(loop1MBB);
+ loop2MBB->addSuccessor(sinkMBB);
+ sinkMBB->addSuccessor(exitMBB);
+
+ // FIXME: computation of newval2 can be moved to loop2MBB.
// thisMBB:
- // addiu tmp1,$0,-4 # 0xfffffffc
- // and addr,ptr,tmp1
- // andi tmp2,ptr,3
- // sll shift,tmp2,3
- // ori tmp3,$0,255 # 0xff
- // sll mask,tmp3,shift
+ // addiu masklsb2,$0,-4 # 0xfffffffc
+ // and alignedaddr,ptr,masklsb2
+ // andi ptrlsb2,ptr,3
+ // sll shiftamt,ptrlsb2,3
+ // ori maskupper,$0,255 # 0xff
+ // sll mask,maskupper,shiftamt
// nor mask2,$0,mask
- // andi tmp4,oldval,255
- // sll oldval2,tmp4,shift
- // andi tmp5,newval,255
- // sll newval2,tmp5,shift
+ // andi maskedcmpval,cmpval,255
+ // sll shiftedcmpval,maskedcmpval,shiftamt
+ // andi maskednewval,newval,255
+ // sll shiftednewval,maskednewval,shiftamt
int64_t MaskImm = (Size == 1) ? 255 : 65535;
- BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
- BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
+ BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ .addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ .addReg(Ptr).addReg(MaskLSB2);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ .addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ .addReg(ShiftAmt).addReg(MaskUpper);
BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift);
- BB->addSuccessor(loop1MBB);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal)
+ .addReg(CmpVal).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal)
+ .addReg(ShiftAmt).addReg(MaskedCmpVal);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal)
+ .addReg(NewVal).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal)
+ .addReg(ShiftAmt).addReg(MaskedNewVal);
// loop1MBB:
- // ll oldval3,0(addr)
- // and oldval4,oldval3,mask
- // bne oldval4,oldval2,exitMBB
+ // ll oldval,0(alginedaddr)
+ // and maskedoldval0,oldval,mask
+ // bne maskedoldval0,shiftedcmpval,sinkMBB
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ .addReg(OldVal).addReg(Mask);
BuildMI(BB, dl, TII->get(Mips::BNE))
- .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(loop2MBB);
+ .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
// loop2MBB:
- // and tmp6,oldval3,mask2
- // or tmp7,tmp6,newval2
- // sc tmp7,0(addr)
- // beq tmp7,$0,loop1MBB
+ // and maskedoldval1,oldval,mask2
+ // or storeval,maskedoldval1,shiftednewval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2);
- BuildMI(BB, dl, TII->get(Mips::SC), Tmp7)
- .addReg(Tmp7).addReg(Addr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ .addReg(OldVal).addReg(Mask2);
+ BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
- // exitMBB:
- // srl tmp8,oldval4,shift
- // sll tmp9,tmp8,24
- // sra dest,tmp9,24
- BB = exitMBB;
+ // sinkMBB:
+ // srl srlres,maskedoldval0,shiftamt
+ // sll sllres,srlres,24
+ // sra dest,sllres,24
+ BB = sinkMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
- // reverse order
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
- .addReg(Tmp9).addImm(ShiftImm);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9)
- .addReg(Tmp8).addImm(ShiftImm);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8)
- .addReg(Oldval4).addReg(Shift);
+
+ BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ .addReg(ShiftAmt).addReg(MaskedOldVal0);
+ BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ .addReg(SrlRes).addImm(ShiftImm);
+ BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ .addReg(SllRes).addImm(ShiftImm);
MI->eraseFromParent(); // The instruction is gone now.
- return BB;
+ return exitMBB;
}
//===----------------------------------------------------------------------===//
@@ -1267,9 +1385,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
SDVTList VTs = DAG.getVTList(MVT::i32);
MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
@@ -1292,21 +1410,26 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
}
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_GOT);
- GA = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, GA);
- SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+ EVT ValTy = Op.getValueType();
+ bool HasGotOfst = (GV->hasInternalLinkage() ||
+ (GV->hasLocalLinkage() && !isa<Function>(GV)));
+ unsigned GotFlag = IsN64 ?
+ (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
+ MipsII::MO_GOT;
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
+ GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA);
+ SDValue ResNode = DAG.getLoad(ValTy, dl,
DAG.getEntryNode(), GA, MachinePointerInfo(),
false, false, 0);
// On functions and global targets not internal linked only
// a load from got/GP is necessary for PIC to work.
- if (!GV->hasInternalLinkage() &&
- (!GV->hasLocalLinkage() || isa<Function>(GV)))
+ if (!HasGotOfst)
return ResNode;
- SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_ABS_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
+ SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0,
+ IsN64 ? MipsII::MO_GOT_OFST :
+ MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo);
+ return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo);
}
SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
@@ -1361,11 +1484,11 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
- Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+ Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(DAG.getEntryNode(),
- (const Type *) Type::getInt32Ty(*DAG.getContext()),
+ (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C, false, true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
dl);
@@ -1557,6 +1680,25 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+// TODO: set SType according to the desired memory barrier behavior.
+SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op,
+ SelectionDAG& DAG) const {
+ unsigned SType = 0;
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(SType, MVT::i32));
+}
+
+SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG& DAG) const {
+ // FIXME: Need pseudo-fence for 'singlethread' fences
+ // FIXME: Set SType for weaker fences where supported/appropriate.
+ unsigned SType = 0;
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(SType, MVT::i32));
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1679,55 +1821,109 @@ static const unsigned O32IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
+// Return next O32 integer argument register.
+static unsigned getNextIntArgReg(unsigned Reg) {
+ assert((Reg == Mips::A0) || (Reg == Mips::A2));
+ return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
+}
+
// Write ByVal Arg to arg registers and stack.
static void
-WriteByValArg(SDValue& Chain, DebugLoc dl,
+WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
- MVT PtrType) {
- unsigned FirstWord = VA.getLocMemOffset() / 4;
- unsigned NumWords = (Flags.getByValSize() + 3) / 4;
- unsigned LastWord = FirstWord + NumWords;
- unsigned CurWord;
-
- // copy the first 4 words of byval arg to registers A0 - A3
- for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize);
- ++CurWord) {
+ MVT PtrType, bool isLittle) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ unsigned Offset = 0;
+ uint32_t RemainingSize = Flags.getByValSize();
+ unsigned ByValAlign = Flags.getByValAlign();
+
+ // Copy the first 4 words of byval arg to registers A0 - A3.
+ // FIXME: Use a stricter alignment if it enables better optimization in passes
+ // run later.
+ for (; RemainingSize >= 4 && LocMemOffset < 4 * 4;
+ Offset += 4, RemainingSize -= 4, LocMemOffset += 4) {
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
- DAG.getConstant((CurWord - FirstWord) * 4,
- MVT::i32));
+ DAG.getConstant(Offset, MVT::i32));
SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, std::min(ByValAlign,
+ (unsigned )4));
MemOpChains.push_back(LoadVal.getValue(1));
- unsigned DstReg = O32IntRegs[CurWord];
+ unsigned DstReg = O32IntRegs[LocMemOffset / 4];
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
}
- // copy remaining part of byval arg to stack.
- if (CurWord < LastWord) {
- unsigned SizeInBytes = (LastWord - CurWord) * 4;
- SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
- DAG.getConstant((CurWord - FirstWord) * 4,
- MVT::i32));
- LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
- Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
- DAG.getConstant(SizeInBytes, MVT::i32),
- /*Align*/4,
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
- MemOpChains.push_back(Chain);
+ if (RemainingSize == 0)
+ return;
+
+ // If there still is a register available for argument passing, write the
+ // remaining part of the structure to it using subword loads and shifts.
+ if (LocMemOffset < 4 * 4) {
+ assert(RemainingSize <= 3 && RemainingSize >= 1 &&
+ "There must be one to three bytes remaining.");
+ unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize);
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ unsigned Alignment = std::min(ByValAlign, (unsigned )4);
+ SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+ LoadPtr, MachinePointerInfo(),
+ MVT::getIntegerVT(LoadSize * 8), false,
+ false, Alignment);
+ MemOpChains.push_back(LoadVal.getValue(1));
+
+ // If target is big endian, shift it to the most significant half-word or
+ // byte.
+ if (!isLittle)
+ LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal,
+ DAG.getConstant(32 - LoadSize * 8, MVT::i32));
+
+ Offset += LoadSize;
+ RemainingSize -= LoadSize;
+
+ // Read second subword if necessary.
+ if (RemainingSize != 0) {
+ assert(RemainingSize == 1 && "There must be one byte remaining.");
+ LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ unsigned Alignment = std::min(ByValAlign, (unsigned )2);
+ SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+ LoadPtr, MachinePointerInfo(),
+ MVT::i8, false, false, Alignment);
+ MemOpChains.push_back(Subword.getValue(1));
+ // Insert the loaded byte to LoadVal.
+ // FIXME: Use INS if supported by target.
+ unsigned ShiftAmt = isLittle ? 16 : 8;
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift);
+ }
+
+ unsigned DstReg = O32IntRegs[LocMemOffset / 4];
+ RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
+ return;
}
+
+ // Create a fixed object on stack at offset LocMemOffset and copy
+ // remaining part of byval arg to it using memcpy.
+ SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
+ SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
+ ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
+ DAG.getConstant(RemainingSize, MVT::i32),
+ std::min(ByValAlign, (unsigned)4),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
}
/// LowerCall - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
/// TODO: isTailCall.
SDValue
-MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -1757,8 +1953,13 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NextStackOffset,
- true));
+ // Chain is the output chain of the last Load/Store or CopyToReg node.
+ // ByValChain is the output chain of the last Memcpy node created for copying
+ // byval arguments to the stack.
+ SDValue Chain, CallSeqStart, ByValChain;
+ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+ Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal);
+ ByValChain = InChain;
// If this is the first call, create a stack frame object that points to
// a location to which .cprestore saves $gp.
@@ -1818,8 +2019,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Arg, DAG.getConstant(1, MVT::i32));
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
- RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
+ unsigned LocRegLo = VA.getLocReg();
+ unsigned LocRegHigh = getNextIntArgReg(LocRegLo);
+ RegsToPass.push_back(std::make_pair(LocRegLo, Lo));
+ RegsToPass.push_back(std::make_pair(LocRegHigh, Hi));
continue;
}
}
@@ -1852,8 +2055,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
"No support for ByVal args by ABIs other than O32 yet.");
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
- WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
- VA, Flags, getPointerTy());
+ WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI,
+ DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle());
continue;
}
@@ -1875,6 +2078,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (LastFI)
MipsFI->extendOutArgFIRange(FirstFI, LastFI);
+ // If a memcpy has been created to copy a byval arg to a stack, replace the
+ // chain input of CallSeqStart with ByValChain.
+ if (InChain != ByValChain)
+ DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain,
+ NextStackOffsetVal);
+
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
@@ -2071,12 +2280,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
if (RegVT == MVT::i32)
RC = Mips::CPURegsRegisterClass;
+ else if (RegVT == MVT::i64)
+ RC = Mips::CPU64RegsRegisterClass;
else if (RegVT == MVT::f32)
RC = Mips::FGR32RegisterClass;
- else if (RegVT == MVT::f64) {
- if (!Subtarget->isSingleFloat())
- RC = Mips::AFGR64RegisterClass;
- } else
+ else if (RegVT == MVT::f64)
+ RC = HasMips64 ? Mips::FGR64RegisterClass : Mips::AFGR64RegisterClass;
+ else
llvm_unreachable("RegVT not supported by FormalArguments Lowering");
// Transform the arguments stored on
@@ -2105,7 +2315,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
- VA.getLocReg()+1, RC);
+ getNextIntArgReg(ArgReg), RC);
SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
if (!Subtarget->isLittle())
std::swap(ArgValue, ArgValue2);
@@ -2313,7 +2523,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index bda26a2..4be3fed5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -81,7 +81,12 @@ namespace llvm {
WrapperPIC,
- DynAlloc
+ DynAlloc,
+
+ Sync,
+
+ Ext,
+ Ins
};
}
@@ -93,6 +98,8 @@ namespace llvm {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
+ virtual bool allowsUnalignedMemoryAccesses (EVT VT) const;
+
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -101,13 +108,14 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - get the ISD::SETCC result ValueType
- MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ EVT getSetCCResultType(EVT VT) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
private:
// Subtarget Info
const MipsSubtarget *Subtarget;
-
+
+ bool HasMips64, IsN64;
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
@@ -128,6 +136,8 @@ namespace llvm {
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index 021c167..2fb9d18 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -27,7 +27,7 @@
def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>,
SDTCisVT<1, OtherVT>]>;
def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
- SDTCisInt<2>]>;
+ SDTCisVT<2, i32>]>;
def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>]>;
def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
@@ -35,12 +35,11 @@ def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
SDTCisSameAs<1, 2>]>;
def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVT<1, f64>,
- SDTCisVT<0, i32>]>;
+ SDTCisVT<2, i32>]>;
def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;
def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;
def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>;
-def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
[SDNPHasChain, SDNPOptInGlue]>;
def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
@@ -55,10 +54,10 @@ let PrintMethod = "printFCCOperand" in
// Feature predicates.
//===----------------------------------------------------------------------===//
-def In32BitMode : Predicate<"!Subtarget.isFP64bit()">;
+def IsFP64bit : Predicate<"Subtarget.isFP64bit()">;
+def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">;
def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">;
def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
-def IsNotMipsI : Predicate<"!Subtarget.isMips1()">;
//===----------------------------------------------------------------------===//
// Instruction Class Templates
@@ -74,97 +73,87 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">;
// Only S32 and D32 are supported right now.
//===----------------------------------------------------------------------===//
-multiclass FFR1_1<bits<6> funct, string asmstr>
-{
- def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
- !strconcat(asmstr, ".s\t$fd, $fs"), []>;
-
- def _D32 : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs),
- !strconcat(asmstr, ".d\t$fd, $fs"), []>, Requires<[In32BitMode]>;
+// FP load.
+class FPLoad<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
+ Operand MemOpnd>:
+ FFI<op, (outs RC:$ft), (ins MemOpnd:$base),
+ !strconcat(opstr, "\t$ft, $base"), [(set RC:$ft, (FOp addr:$base))]>;
+
+// FP store.
+class FPStore<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
+ Operand MemOpnd>:
+ FFI<op, (outs), (ins RC:$ft, MemOpnd:$base),
+ !strconcat(opstr, "\t$ft, $base"), [(store RC:$ft, addr:$base)]>;
+
+// Instructions that convert an FP value to 32-bit fixed point.
+multiclass FFR1_W_M<bits<6> funct, string opstr> {
+ def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
+ def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>,
+ Requires<[IsFP64bit]>;
}
-multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
-{
- def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
- !strconcat(asmstr, ".s\t$fd, $fs"),
- [(set FGR32:$fd, (FOp FGR32:$fs))]>;
-
- def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
- !strconcat(asmstr, ".d\t$fd, $fs"),
- [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
+// Instructions that convert an FP value to 64-bit fixed point.
+let Predicates = [IsFP64bit] in
+multiclass FFR1_L_M<bits<6> funct, string opstr> {
+ def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>;
+ def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>;
}
-class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
- RegisterClass RcDst, string asmstr>:
- FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
- !strconcat(asmstr, "\t$fd, $fs"), []>;
-
+// FP-to-FP conversion instructions.
+multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> {
+ def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>;
+ def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>,
+ Requires<[IsFP64bit]>;
+}
-multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp, bit isComm = 0> {
+multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
let isCommutable = isComm in {
- def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
- (ins FGR32:$fs, FGR32:$ft),
- !strconcat(asmstr, ".s\t$fd, $fs, $ft"),
- [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
-
- def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
- (ins AFGR64:$fs, AFGR64:$ft),
- !strconcat(asmstr, ".d\t$fd, $fs, $ft"),
- [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
- Requires<[In32BitMode]>;
+ def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>;
+ def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>,
+ Requires<[IsFP64bit]>;
}
}
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
+defm ROUND_W : FFR1_W_M<0xc, "round">;
+defm ROUND_L : FFR1_L_M<0x8, "round">;
+defm TRUNC_W : FFR1_W_M<0xd, "trunc">;
+defm TRUNC_L : FFR1_L_M<0x9, "trunc">;
+defm CEIL_W : FFR1_W_M<0xe, "ceil">;
+defm CEIL_L : FFR1_L_M<0xa, "ceil">;
+defm FLOOR_W : FFR1_W_M<0xf, "floor">;
+defm FLOOR_L : FFR1_L_M<0xb, "floor">;
+defm CVT_W : FFR1_W_M<0x24, "cvt">;
+defm CVT_L : FFR1_L_M<0x25, "cvt">;
+
+def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>;
+
+let Predicates = [NotFP64bit] in {
+ def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>;
+ def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>;
+ def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>;
+}
-let ft = 0 in {
- defm FLOOR_W : FFR1_1<0b001111, "floor.w">;
- defm CEIL_W : FFR1_1<0b001110, "ceil.w">;
- defm ROUND_W : FFR1_1<0b001100, "round.w">;
- defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
- defm CVTW : FFR1_1<0b100100, "cvt.w">;
-
- defm FABS : FFR1_2<0b000101, "abs", fabs>;
- defm FNEG : FFR1_2<0b000111, "neg", fneg>;
- defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>;
-
- /// Convert to Single Precison
- def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32, FGR32, "cvt.s.w">;
-
- let Predicates = [IsNotSingleFloat] in {
- /// Ceil to long signed integer
- def CEIL_LS : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">;
- def CEIL_LD : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">;
-
- /// Round to long signed integer
- def ROUND_LS : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">;
- def ROUND_LD : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">;
-
- /// Floor to long signed integer
- def FLOOR_LS : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">;
- def FLOOR_LD : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">;
-
- /// Trunc to long signed integer
- def TRUNC_LS : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">;
- def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
-
- /// Convert to long signed integer
- def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
- def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
-
- /// Convert to Double Precison
- def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
- def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
- def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
-
- /// Convert to Single Precison
- def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
- def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
- }
+let Predicates = [IsFP64bit] in {
+ def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>;
+ def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>;
+ def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>;
+ def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>;
+ def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
}
+defm FABS : FFR1P_M<0x5, "abs", fabs>;
+defm FNEG : FFR1P_M<0x7, "neg", fneg>;
+defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>;
+
// The odd-numbered registers are only referenced when doing loads,
// stores, and moves between floating-point and integer registers.
// When defining instructions, we reference all 32-bit registers,
@@ -178,37 +167,46 @@ let fd = 0 in {
"ctc1\t$fs, $rt", []>;
def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
- "mfc1\t$rt, $fs", []>;
+ "mfc1\t$rt, $fs",
+ [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>;
def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
- "mtc1\t$rt, $fs", []>;
+ "mtc1\t$rt, $fs",
+ [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>;
}
-def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
- "mov.s\t$fd, $fs", []>;
-def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
- "mov.d\t$fd, $fs", []>;
+def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
+def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
+ Requires<[NotFP64bit]>;
+def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
+ Requires<[IsFP64bit]>;
/// Floating Point Memory Instructions
-let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
- def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
- "ldc1\t$ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
-
- def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
- "sdc1\t$ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
+let Predicates = [IsN64] in {
+ def LWC1_P8 : FPLoad<0x31, "lwc1", load, FGR32, mem64>;
+ def SWC1_P8 : FPStore<0x39, "swc1", store, FGR32, mem64>;
+ def LDC164_P8 : FPLoad<0x35, "ldc1", load, FGR64, mem64>;
+ def SDC164_P8 : FPStore<0x3d, "sdc1", store, FGR64, mem64>;
}
-// LWC1 and SWC1 can always be emitted with odd registers.
-def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1\t$ft, $addr",
- [(set FGR32:$ft, (load addr:$addr))]>;
-def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr),
- "swc1\t$ft, $addr", [(store FGR32:$ft, addr:$addr)]>;
+let Predicates = [NotN64] in {
+ def LWC1 : FPLoad<0x31, "lwc1", load, FGR32, mem>;
+ def SWC1 : FPStore<0x39, "swc1", store, FGR32, mem>;
+ let Predicates = [HasMips64] in {
+ def LDC164 : FPLoad<0x35, "ldc1", load, FGR64, mem>;
+ def SDC164 : FPStore<0x3d, "sdc1", store, FGR64, mem>;
+ }
+ let Predicates = [NotMips64] in {
+ def LDC1 : FPLoad<0x35, "ldc1", load, AFGR64, mem>;
+ def SDC1 : FPStore<0x3d, "sdc1", store, AFGR64, mem>;
+ }
+}
/// Floating-point Aritmetic
-defm FADD : FFR1_4<0x10, "add", fadd, 1>;
-defm FDIV : FFR1_4<0x03, "div", fdiv>;
-defm FMUL : FFR1_4<0x02, "mul", fmul, 1>;
-defm FSUB : FFR1_4<0x01, "sub", fsub>;
+defm FADD : FFR2P_M<0x10, "add", fadd, 1>;
+defm FDIV : FFR2P_M<0x03, "div", fdiv>;
+defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
+defm FSUB : FFR2P_M<0x01, "sub", fsub>;
//===----------------------------------------------------------------------===//
// Floating Point Branch Codes
@@ -217,8 +215,6 @@ defm FSUB : FFR1_4<0x01, "sub", fsub>;
// They must be kept in synch.
def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
-def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
-def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
/// Floating Point Branch of False/True (Likely)
let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
@@ -228,8 +224,6 @@ let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">;
def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">;
-def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
-def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
@@ -254,7 +248,7 @@ def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
/// Floating Point Compare
-let hasDelaySlot = 1, Defs=[FCR31] in {
+let Defs=[FCR31] in {
def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
"c.$cc.s\t$fs, $ft",
[(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
@@ -262,7 +256,7 @@ let hasDelaySlot = 1, Defs=[FCR31] in {
def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
"c.$cc.d\t$fs, $ft",
[(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
- Requires<[In32BitMode]>;
+ Requires<[NotFP64bit]>;
}
@@ -280,7 +274,7 @@ class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
-let Predicates = [In32BitMode] in {
+let Predicates = [NotFP64bit] in {
def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
}
@@ -288,7 +282,7 @@ let Predicates = [In32BitMode] in {
defm : MovzPats<FGR32, MOVZ_S>;
defm : MovnPats<FGR32, MOVN_S>;
-let Predicates = [In32BitMode] in {
+let Predicates = [NotFP64bit] in {
defm : MovzPats<AFGR64, MOVZ_D>;
defm : MovnPats<AFGR64, MOVN_D>;
}
@@ -313,7 +307,7 @@ def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
-let Predicates = [In32BitMode] in {
+let Predicates = [NotFP64bit] in {
def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
}
@@ -353,22 +347,16 @@ def fpimm0neg : PatLeaf<(fpimm), [{
}]>;
def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
-def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>;
+def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
-def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
-def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
+def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
+def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
-def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
+def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
-def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>;
-def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
-
-let Predicates = [In32BitMode] in {
- def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
- def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
+let Predicates = [NotFP64bit] in {
+ def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
+ def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
}
-// MipsFPRound is only emitted for MipsI targets.
-def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>;
-
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
index 9f55fb3..d246a26 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -44,7 +44,9 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
// Mips Pseudo Instructions Format
class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, IIPseudo>;
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo> {
+ let isPseudo = 1;
+}
//===----------------------------------------------------------------------===//
// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
@@ -88,6 +90,21 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
let Inst{15-0} = imm16;
}
+class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
//===----------------------------------------------------------------------===//
// Format J instruction class in Mips : <|opcode|address|>
//===----------------------------------------------------------------------===//
@@ -224,4 +241,27 @@ class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
let Inst{15-11} = fs;
let Inst{10-6} = fd;
let Inst{5-0} = 17;
-} \ No newline at end of file
+}
+
+// FP unary instructions without patterns.
+class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
+ RegisterClass DstRC, RegisterClass SrcRC> :
+ FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> {
+ let ft = 0;
+}
+
+// FP unary instructions with patterns.
+class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
+ RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> :
+ FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"),
+ [(set DstRC:$fd, (OpNode SrcRC:$fs))]> {
+ let ft = 0;
+}
+
+class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
+ string fmtstr, RegisterClass RC, SDNode OpNode> :
+ FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 0a7a7f2..559943a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -17,8 +17,8 @@
#include "InstPrinter/MipsInstPrinter.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
@@ -28,7 +28,8 @@ using namespace llvm;
MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
- TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+ TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
+ RI(*TM.getSubtargetImpl(), *this) {}
const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
@@ -47,8 +48,12 @@ static bool isZeroImm(const MachineOperand &op) {
unsigned MipsInstrInfo::
isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
- if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
- (MI->getOpcode() == Mips::LDC1)) {
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
+ (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
+ (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
+ (Opc == Mips::LDC164_P8)) {
if ((MI->getOperand(1).isFI()) && // is a stack slot
(MI->getOperand(2).isImm()) && // the imm is zero
(isZeroImm(MI->getOperand(2)))) {
@@ -68,8 +73,12 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
unsigned MipsInstrInfo::
isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
- if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
- (MI->getOpcode() == Mips::SDC1)) {
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
+ (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
+ (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
+ (Opc == Mips::SDC164_P8)) {
if ((MI->getOperand(1).isFI()) && // is a stack slot
(MI->getOperand(2).isImm()) && // the imm is zero
(isZeroImm(MI->getOperand(2)))) {
@@ -94,70 +103,63 @@ copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
- bool DestCPU = Mips::CPURegsRegClass.contains(DestReg);
- bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg);
-
- // CPU-CPU is the most common.
- if (DestCPU && SrcCPU) {
- BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
+ unsigned Opc = 0, ZeroReg = 0;
- // Copy to CPU from other registers.
- if (DestCPU) {
- if (Mips::CCRRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ else if (Mips::CCRRegClass.contains(SrcReg))
+ Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ Opc = Mips::MFC1;
else if (SrcReg == Mips::HI)
- BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg);
+ Opc = Mips::MFHI, SrcReg = 0;
else if (SrcReg == Mips::LO)
- BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg);
- else
- llvm_unreachable("Copy to CPU from invalid register");
- return;
+ Opc = Mips::MFLO, SrcReg = 0;
}
-
- // Copy to other registers from CPU.
- if (SrcCPU) {
+ else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
if (Mips::CCRRegClass.contains(DestReg))
- BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ Opc = Mips::CTC1;
else if (Mips::FGR32RegClass.contains(DestReg))
- BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ Opc = Mips::MTC1;
else if (DestReg == Mips::HI)
- BuildMI(MBB, I, DL, get(Mips::MTHI))
- .addReg(SrcReg, getKillRegState(KillSrc));
+ Opc = Mips::MTHI, DestReg = 0;
else if (DestReg == Mips::LO)
- BuildMI(MBB, I, DL, get(Mips::MTLO))
- .addReg(SrcReg, getKillRegState(KillSrc));
- else
- llvm_unreachable("Copy from CPU to invalid register");
- return;
+ Opc = Mips::MTLO, DestReg = 0;
}
-
- if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
+ else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_S;
+ else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D32;
+ else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::MOVCCRToCCR;
+ else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
+ if (Mips::CPU64RegsRegClass.contains(SrcReg))
+ Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ else if (SrcReg == Mips::HI64)
+ Opc = Mips::MFHI64, SrcReg = 0;
+ else if (SrcReg == Mips::LO64)
+ Opc = Mips::MFLO64, SrcReg = 0;
}
-
- if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
+ else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
+ if (DestReg == Mips::HI64)
+ Opc = Mips::MTHI64, DestReg = 0;
+ else if (DestReg == Mips::LO64)
+ Opc = Mips::MTLO64, DestReg = 0;
}
- if (Mips::CCRRegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
- llvm_unreachable("Cannot copy registers");
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
}
void MipsInstrInfo::
@@ -167,31 +169,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
+ unsigned Opc = 0;
if (RC == Mips::CPURegsRegisterClass)
- BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
+ Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ else if (RC == Mips::CPU64RegsRegisterClass)
+ Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
else if (RC == Mips::FGR32RegisterClass)
- BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
- else if (RC == Mips::AFGR64RegisterClass) {
- if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
- BuildMI(MBB, I, DL, get(Mips::SDC1))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
- } else {
- const TargetRegisterInfo *TRI =
- MBB.getParent()->getTarget().getRegisterInfo();
- const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
- BuildMI(MBB, I, DL, get(Mips::SWC1))
- .addReg(SubSet[0], getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
- BuildMI(MBB, I, DL, get(Mips::SWC1))
- .addReg(SubSet[1], getKillRegState(isKill))
- .addFrameIndex(FI).addImm(4);
- }
- } else
- llvm_unreachable("Register class not handled!");
+ Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ else if (RC == Mips::AFGR64RegisterClass)
+ Opc = Mips::SDC1;
+ else if (RC == Mips::FGR64RegisterClass)
+ Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
}
void MipsInstrInfo::
@@ -202,25 +195,21 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
+ unsigned Opc = 0;
if (RC == Mips::CPURegsRegisterClass)
- BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0);
+ Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ else if (RC == Mips::CPU64RegsRegisterClass)
+ Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
else if (RC == Mips::FGR32RegisterClass)
- BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0);
- else if (RC == Mips::AFGR64RegisterClass) {
- if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
- BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0);
- } else {
- const TargetRegisterInfo *TRI =
- MBB.getParent()->getTarget().getRegisterInfo();
- const unsigned *SubSet = TRI->getSubRegisters(DestReg);
- BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
- .addFrameIndex(FI).addImm(0);
- BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
- .addFrameIndex(FI).addImm(4);
- }
- } else
- llvm_unreachable("Register class not handled!");
+ Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ else if (RC == Mips::AFGR64RegisterClass)
+ Opc = Mips::LDC1;
+ else if (RC == Mips::FGR64RegisterClass)
+ Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0);
}
MachineInstr*
@@ -237,9 +226,12 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
//===----------------------------------------------------------------------===//
static unsigned GetAnalyzableBrOpc(unsigned Opc) {
- return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
- Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
- Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0;
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
+ Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ?
+ Opc : 0;
}
/// GetOppositeBranchOpc - Return the inverse of the specified
@@ -248,14 +240,20 @@ unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
{
switch (Opc) {
default: llvm_unreachable("Illegal opcode!");
- case Mips::BEQ : return Mips::BNE;
- case Mips::BNE : return Mips::BEQ;
- case Mips::BGTZ : return Mips::BLEZ;
- case Mips::BGEZ : return Mips::BLTZ;
- case Mips::BLTZ : return Mips::BGEZ;
- case Mips::BLEZ : return Mips::BGTZ;
- case Mips::BC1T : return Mips::BC1F;
- case Mips::BC1F : return Mips::BC1T;
+ case Mips::BEQ : return Mips::BNE;
+ case Mips::BNE : return Mips::BEQ;
+ case Mips::BGTZ : return Mips::BLEZ;
+ case Mips::BGEZ : return Mips::BLTZ;
+ case Mips::BLTZ : return Mips::BGEZ;
+ case Mips::BLEZ : return Mips::BGTZ;
+ case Mips::BEQ64 : return Mips::BNE64;
+ case Mips::BNE64 : return Mips::BEQ64;
+ case Mips::BGTZ64 : return Mips::BLEZ64;
+ case Mips::BGEZ64 : return Mips::BLTZ64;
+ case Mips::BLTZ64 : return Mips::BGEZ64;
+ case Mips::BLEZ64 : return Mips::BGTZ64;
+ case Mips::BC1T : return Mips::BC1F;
+ case Mips::BC1F : return Mips::BC1T;
}
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
index 4421c48..271d248 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -72,12 +72,47 @@ namespace MipsII {
/// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
// the thread pointer (Local Exec TLS).
MO_TPREL_HI,
- MO_TPREL_LO
+ MO_TPREL_LO,
+
+ // N32/64 Flags.
+ MO_GPOFF_HI,
+ MO_GPOFF_LO,
+ MO_GOT_DISP,
+ MO_GOT_PAGE,
+ MO_GOT_OFST
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for
+ // Mips instructions.
+ //
+
+ // Pseudo - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// FrmR - This form is for instructions of the format R.
+ FrmR = 1,
+ /// FrmI - This form is for instructions of the format I.
+ FrmI = 2,
+ /// FrmJ - This form is for instructions of the format J.
+ FrmJ = 3,
+ /// FrmFR - This form is for instructions of the format FR.
+ FrmFR = 4,
+ /// FrmFI - This form is for instructions of the format FI.
+ FrmFI = 5,
+ /// FrmOther - This form is for instructions that have no specific format.
+ FrmOther = 6,
+
+ FormMask = 15
};
}
class MipsInstrInfo : public MipsGenInstrInfo {
MipsTargetMachine &TM;
+ bool IsN64;
const MipsRegisterInfo RI;
public:
explicit MipsInstrInfo(MipsTargetMachine &TM);
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index d1a0587..06b7de7 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -34,13 +34,20 @@ def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
SDTCisSameAs<1, 2>,
SDTCisSameAs<2, 3>]>;
def SDT_MipsDivRem : SDTypeProfile<0, 2,
- [SDTCisVT<0, i32>,
+ [SDTCisInt<0>,
SDTCisSameAs<0, 1>]>;
def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
SDTCisVT<1, iPTR>]>;
+def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>;
+def SDT_Ins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>,
+ SDTCisSameAs<0, 4>]>;
// Call
def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
@@ -106,6 +113,11 @@ def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>;
def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
[SDNPHasChain, SDNPInGlue]>;
+def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>;
+
+def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
+def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>;
+
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
@@ -113,8 +125,13 @@ def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">;
def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
def HasSwap : Predicate<"Subtarget.hasSwap()">;
def HasCondMov : Predicate<"Subtarget.hasCondMov()">;
-def IsMips32 : Predicate<"Subtarget.isMips32()">;
-def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">;
+def HasMips32 : Predicate<"Subtarget.hasMips32()">;
+def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">;
+def HasMips64 : Predicate<"Subtarget.hasMips64()">;
+def NotMips64 : Predicate<"!Subtarget.hasMips64()">;
+def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">;
+def IsN64 : Predicate<"Subtarget.isABI_N64()">;
+def NotN64 : Predicate<"!Subtarget.isABI_N64()">;
//===----------------------------------------------------------------------===//
// Mips Operand, Complex Patterns and Transformations Definitions.
@@ -124,6 +141,7 @@ def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">;
def brtarget : Operand<OtherVT>;
def calltarget : Operand<i32>;
def simm16 : Operand<i32>;
+def simm16_64 : Operand<i64>;
def shamt : Operand<i32>;
// Unsigned Operand
@@ -137,6 +155,11 @@ def mem : Operand<i32> {
let MIOperandInfo = (ops CPURegs, simm16);
}
+def mem64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPU64Regs, simm16_64);
+}
+
def mem_ea : Operand<i32> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPURegs, simm16);
@@ -177,36 +200,85 @@ def immZExt5 : PatLeaf<(imm), [{
def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
//===----------------------------------------------------------------------===//
+// Pattern fragment for load/store
+//===----------------------------------------------------------------------===//
+class UnalignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment();
+}]>;
+
+class AlignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment();
+}]>;
+
+class UnalignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
+ (Node node:$val, node:$ptr), [{
+ StoreSDNode *SD = cast<StoreSDNode>(N);
+ return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment();
+}]>;
+
+class AlignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
+ (Node node:$val, node:$ptr), [{
+ StoreSDNode *SD = cast<StoreSDNode>(N);
+ return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment();
+}]>;
+
+// Load/Store PatFrags.
+def sextloadi16_a : AlignedLoad<sextloadi16>;
+def zextloadi16_a : AlignedLoad<zextloadi16>;
+def extloadi16_a : AlignedLoad<extloadi16>;
+def load_a : AlignedLoad<load>;
+def sextloadi32_a : AlignedLoad<sextloadi32>;
+def zextloadi32_a : AlignedLoad<zextloadi32>;
+def extloadi32_a : AlignedLoad<extloadi32>;
+def truncstorei16_a : AlignedStore<truncstorei16>;
+def store_a : AlignedStore<store>;
+def truncstorei32_a : AlignedStore<truncstorei32>;
+def sextloadi16_u : UnalignedLoad<sextloadi16>;
+def zextloadi16_u : UnalignedLoad<zextloadi16>;
+def extloadi16_u : UnalignedLoad<extloadi16>;
+def load_u : UnalignedLoad<load>;
+def sextloadi32_u : UnalignedLoad<sextloadi32>;
+def zextloadi32_u : UnalignedLoad<zextloadi32>;
+def extloadi32_u : UnalignedLoad<extloadi32>;
+def truncstorei16_u : UnalignedStore<truncstorei16>;
+def store_u : UnalignedStore<store>;
+def truncstorei32_u : UnalignedStore<truncstorei32>;
+
+//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
-// Arithmetic 3 register operands
-class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
- InstrItinClass itin, bit isComm = 0>:
- FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin> {
+// Arithmetic and logical instructions with 3 register operands.
+class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
+ InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> {
+ let shamt = 0;
let isCommutable = isComm;
}
class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm,
- bit isComm = 0>:
- FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu> {
+ InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> {
+ let shamt = 0;
let isCommutable = isComm;
}
-// Arithmetic 2 register operands
-class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
- Operand Od, PatLeaf imm_type> :
- FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+// Arithmetic and logical instructions with 2 register operands.
+class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type, RegisterClass RC> :
+ FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rt, $rs, $i"),
+ [(set RC:$rt, (OpNode RC:$rs, imm_type:$i))], IIAlu>;
class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
- Operand Od, PatLeaf imm_type> :
- FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
+ Operand Od, PatLeaf imm_type, RegisterClass RC> :
+ FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rt, $rs, $i"), [], IIAlu>;
// Arithmetic Multiply ADD/SUB
let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
@@ -214,92 +286,134 @@ class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> :
FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
!strconcat(instr_asm, "\t$rs, $rt"),
[(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> {
+ let rd = 0;
+ let shamt = 0;
let isCommutable = isComm;
}
// Logical
-let isCommutable = 1 in
-class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
- FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
-
-class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
- FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
-
-let isCommutable = 1 in
-class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
- FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> {
+ let shamt = 0;
+ let isCommutable = 1;
+}
// Shifts
class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
SDNode OpNode>:
- FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> {
+ FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rt, shamt:$shamt),
+ !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
+ [(set CPURegs:$rd, (OpNode CPURegs:$rt, (i32 immZExt5:$shamt)))], IIAlu> {
let rs = _rs;
}
-class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
SDNode OpNode>:
- FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> {
- let shamt = _shamt;
+ FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt, $rs"),
+ [(set CPURegs:$rd, (OpNode CPURegs:$rt, CPURegs:$rs))], IIAlu> {
+ let shamt = isRotate;
}
// Load Upper Imediate
class LoadUpper<bits<6> op, string instr_asm>:
- FI< op,
- (outs CPURegs:$dst),
- (ins uimm16:$imm),
- !strconcat(instr_asm, "\t$dst, $imm"),
- [], IIAlu>;
+ FI<op, (outs CPURegs:$rt), (ins uimm16:$imm),
+ !strconcat(instr_asm, "\t$rt, $imm"), [], IIAlu> {
+ let rs = 0;
+}
// Memory Load/Store
-let canFoldAsLoad = 1, hasDelaySlot = 1 in
-class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
- FI<op, (outs CPURegs:$dst), (ins mem:$addr),
- !strconcat(instr_asm, "\t$dst, $addr"),
- [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+let canFoldAsLoad = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
+ Operand MemOpnd, bit Pseudo>:
+ FI<op, (outs RC:$rt), (ins MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr))], IILoad> {
+ let isPseudo = Pseudo;
+}
-class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
- FI<op, (outs), (ins CPURegs:$dst, mem:$addr),
- !strconcat(instr_asm, "\t$dst, $addr"),
- [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
+ Operand MemOpnd, bit Pseudo>:
+ FI<op, (outs), (ins RC:$rt, MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addr:$addr)], IIStore> {
+ let isPseudo = Pseudo;
+}
+
+// 32-bit load.
+multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 64-bit load.
+multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 32-bit store.
+multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 64-bit store.
+multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
// Conditional Branch
-let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
-class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
- FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
- !strconcat(instr_asm, "\t$a, $b, $offset"),
- [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
- IIBranch>;
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
+ CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$offset),
+ !strconcat(instr_asm, "\t$rs, $rt, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
-class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
- FI<op, (outs), (ins CPURegs:$src, brtarget:$offset),
- !strconcat(instr_asm, "\t$src, $offset"),
- [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
- IIBranch>;
+class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
+ RegisterClass RC>:
+ CBranchBase<op, (outs), (ins RC:$rs, brtarget:$offset),
+ !strconcat(instr_asm, "\t$rs, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch> {
+ let rt = _rt;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
}
// SetCC
-class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
- PatFrag cond_op>:
- FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
- IIAlu>;
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op,
+ RegisterClass RC>:
+ FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))],
+ IIAlu> {
+ let shamt = 0;
+}
-class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
- Operand Od, PatLeaf imm_type>:
- FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
+ PatLeaf imm_type, RegisterClass RC>:
+ FI<op, (outs CPURegs:$rd), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rd, $rs, $i"),
+ [(set CPURegs:$rd, (cond_op RC:$rs, imm_type:$i))],
IIAlu>;
// Unconditional branch
@@ -310,8 +424,12 @@ class JumpFJ<bits<6> op, string instr_asm>:
let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
- FR<op, func, (outs), (ins CPURegs:$target),
- !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>;
+ FR<op, func, (outs), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> {
+ let rt = 0;
+ let rd = 0;
+ let shamt = 0;
+}
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1,
@@ -323,76 +441,124 @@ let isCall=1, hasDelaySlot=1,
!strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
IIBranch>;
- let rd=31 in
class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
- !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+ !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch> {
+ let rt = 0;
+ let rd = 31;
+ let shamt = 0;
+ }
class BranchLink<string instr_asm>:
FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops),
- !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>;
+ !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch> {
+ let rt = 0;
+ }
}
// Mul, Div
-let Defs = [HI, LO] in {
- let isCommutable = 1 in
- class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
- !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rs, $rt"), [], itin> {
+ let rd = 0;
+ let shamt = 0;
+ let isCommutable = 1;
+ let Defs = [HI, LO];
+}
- class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
- !strconcat(instr_asm, "\t$$zero, $a, $b"),
- [(op CPURegs:$a, CPURegs:$b)], itin>;
+class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
+ [(op CPURegs:$rs, CPURegs:$rt)], itin> {
+ let rd = 0;
+ let shamt = 0;
+ let Defs = [HI, LO];
}
// Move from Hi/Lo
class MoveFromLOHI<bits<6> func, string instr_asm>:
- FR<0x00, func, (outs CPURegs:$dst), (ins),
- !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
+ FR<0x00, func, (outs CPURegs:$rd), (ins),
+ !strconcat(instr_asm, "\t$rd"), [], IIHiLo> {
+ let rs = 0;
+ let rt = 0;
+ let shamt = 0;
+}
class MoveToLOHI<bits<6> func, string instr_asm>:
- FR<0x00, func, (outs), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
+ FR<0x00, func, (outs), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rs"), [], IIHiLo> {
+ let rt = 0;
+ let rd = 0;
+ let shamt = 0;
+}
class EffectiveAddress<string instr_asm> :
- FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr),
- instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+ FI<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr),
+ instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>;
// Count Leading Ones/Zeros in Word
class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
- FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+ FR<0x1c, func, (outs CPURegs:$rd), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>,
Requires<[HasBitCount]> {
let shamt = 0;
let rt = rd;
}
// Sign Extend in Register.
-class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
- FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"),
- [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt>:
+ FR<0x3f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt"),
+ [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> {
+ let rs = 0;
+ let shamt = sa;
+ let Predicates = [HasSEInReg];
+}
// Byte Swap
-class ByteSwap<bits<6> func, string instr_asm>:
- FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"),
- [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
-
-// Conditional Move
-class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
- FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
- CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
- [], NoItinerary>;
+class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>:
+ FR<0x1f, func, (outs CPURegs:$rd), (ins CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt"),
+ [(set CPURegs:$rd, (bswap CPURegs:$rt))], NoItinerary> {
+ let rs = 0;
+ let shamt = sa;
+ let Predicates = [HasSwap];
+}
// Read Hardware
-class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$dst), (ins HWRegs:$src),
- "rdhwr\t$dst, $src", [], IIAlu> {
+class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd),
+ "rdhwr\t$rt, $rd", [], IIAlu> {
let rs = 0;
let shamt = 0;
}
+// Ext and Ins
+class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins,
+ list<dag> pattern, InstrItinClass itin>:
+ FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ pattern, itin>, Requires<[HasMips32r2]> {
+ bits<5> pos;
+ bits<5> sz;
+ let rd = sz;
+ let shamt = pos;
+}
+
+// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
+class Atomic2Ops<PatFrag Op, string Opstr> :
+ MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
+ [(set CPURegs:$dst,
+ (Op CPURegs:$ptr, CPURegs:$incr))]>;
+
+// Atomic Compare & Swap.
+class AtomicCmpSwap<PatFrag Op, string Width> :
+ MipsPseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap),
+ !strconcat("atomic_cmp_swap_", Width,
+ "\t$dst, $ptr, $cmp, $swap"),
+ [(set CPURegs:$dst,
+ (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>;
+
//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
@@ -427,112 +593,32 @@ def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
let usesCustomInserter = 1 in {
- def ATOMIC_LOAD_ADD_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_add_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_add_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_ADD_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_add_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_add_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_ADD_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_add_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_add_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_SUB_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_sub_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_sub_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_SUB_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_sub_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_sub_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_SUB_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_sub_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_sub_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_AND_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_and_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_and_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_AND_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_and_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_and_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_AND_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_and_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_and_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_OR_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_or_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_or_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_OR_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_or_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_or_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_OR_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_or_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_or_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_XOR_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_xor_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_xor_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_XOR_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_xor_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_xor_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_XOR_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_xor_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_xor_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_NAND_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_nand_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_nand_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_NAND_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_nand_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_nand_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_NAND_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_nand_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_nand_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_SWAP_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
- "atomic_swap_8\t$dst, $ptr, $val",
- [(set CPURegs:$dst, (atomic_swap_8 CPURegs:$ptr, CPURegs:$val))]>;
- def ATOMIC_SWAP_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
- "atomic_swap_16\t$dst, $ptr, $val",
- [(set CPURegs:$dst, (atomic_swap_16 CPURegs:$ptr, CPURegs:$val))]>;
- def ATOMIC_SWAP_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
- "atomic_swap_32\t$dst, $ptr, $val",
- [(set CPURegs:$dst, (atomic_swap_32 CPURegs:$ptr, CPURegs:$val))]>;
-
- def ATOMIC_CMP_SWAP_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
- "atomic_cmp_swap_8\t$dst, $ptr, $oldval, $newval",
- [(set CPURegs:$dst,
- (atomic_cmp_swap_8 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
- def ATOMIC_CMP_SWAP_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
- "atomic_cmp_swap_16\t$dst, $ptr, $oldval, $newval",
- [(set CPURegs:$dst,
- (atomic_cmp_swap_16 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
- def ATOMIC_CMP_SWAP_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
- "atomic_cmp_swap_32\t$dst, $ptr, $oldval, $newval",
- [(set CPURegs:$dst,
- (atomic_cmp_swap_32 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
+ def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, "load_add_8">;
+ def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, "load_add_16">;
+ def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, "load_add_32">;
+ def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, "load_sub_8">;
+ def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, "load_sub_16">;
+ def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, "load_sub_32">;
+ def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, "load_and_8">;
+ def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, "load_and_16">;
+ def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, "load_and_32">;
+ def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, "load_or_8">;
+ def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, "load_or_16">;
+ def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, "load_or_32">;
+ def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, "load_xor_8">;
+ def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, "load_xor_16">;
+ def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, "load_xor_32">;
+ def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, "load_nand_8">;
+ def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, "load_nand_16">;
+ def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, "load_nand_32">;
+
+ def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, "swap_8">;
+ def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, "swap_16">;
+ def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, "swap_32">;
+
+ def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, "8">;
+ def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, "16">;
+ def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, "32">;
}
//===----------------------------------------------------------------------===//
@@ -544,26 +630,26 @@ let usesCustomInserter = 1 in {
//===----------------------------------------------------------------------===//
/// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithI<0x09, "addiu", add, simm16, immSExt16>;
-def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16>;
-def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>;
-def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>;
-def ANDi : LogicI<0x0c, "andi", and>;
-def ORi : LogicI<0x0d, "ori", or>;
-def XORi : LogicI<0x0e, "xori", xor>;
+def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>;
+def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>;
+def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>;
+def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>;
+def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>;
+def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>;
+def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>;
def LUi : LoadUpper<0x0f, "lui">;
/// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithR<0x00, 0x21, "addu", add, IIAlu, 1>;
-def SUBu : ArithR<0x00, 0x23, "subu", sub, IIAlu>;
-def ADD : ArithOverflowR<0x00, 0x20, "add", 1>;
-def SUB : ArithOverflowR<0x00, 0x22, "sub">;
-def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>;
-def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>;
-def AND : LogicR<0x24, "and", and>;
-def OR : LogicR<0x25, "or", or>;
-def XOR : LogicR<0x26, "xor", xor>;
-def NOR : LogicNOR<0x00, 0x27, "nor">;
+def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>;
+def SUBu : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>;
+def ADD : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>;
+def SUB : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>;
+def SLT : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>;
+def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>;
+def AND : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>;
+def OR : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPURegs, 1>;
+def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>;
+def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>;
/// Shift Instructions
def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
@@ -574,45 +660,58 @@ def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
// Rotate Instructions
-let Predicates = [IsMips32r2] in {
+let Predicates = [HasMips32r2] in {
def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
}
/// Load and Store Instructions
-def LB : LoadM<0x20, "lb", sextloadi8>;
-def LBu : LoadM<0x24, "lbu", zextloadi8>;
-def LH : LoadM<0x21, "lh", sextloadi16>;
-def LHu : LoadM<0x25, "lhu", zextloadi16>;
-def LW : LoadM<0x23, "lw", load>;
-def SB : StoreM<0x28, "sb", truncstorei8>;
-def SH : StoreM<0x29, "sh", truncstorei16>;
-def SW : StoreM<0x2b, "sw", store>;
+/// aligned
+defm LB : LoadM32<0x20, "lb", sextloadi8>;
+defm LBu : LoadM32<0x24, "lbu", zextloadi8>;
+defm LH : LoadM32<0x21, "lh", sextloadi16_a>;
+defm LHu : LoadM32<0x25, "lhu", zextloadi16_a>;
+defm LW : LoadM32<0x23, "lw", load_a>;
+defm SB : StoreM32<0x28, "sb", truncstorei8>;
+defm SH : StoreM32<0x29, "sh", truncstorei16_a>;
+defm SW : StoreM32<0x2b, "sw", store_a>;
+
+/// unaligned
+defm ULH : LoadM32<0x21, "ulh", sextloadi16_u, 1>;
+defm ULHu : LoadM32<0x25, "ulhu", zextloadi16_u, 1>;
+defm ULW : LoadM32<0x23, "ulw", load_u, 1>;
+defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>;
+defm USW : StoreM32<0x2b, "usw", store_u, 1>;
+
+let hasSideEffects = 1 in
+def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
+ [(MipsSync imm:$stype)], NoItinerary>
+{
+ let opcode = 0;
+ let Inst{25-11} = 0;
+ let Inst{5-0} = 15;
+}
/// Load-linked, Store-conditional
-let hasDelaySlot = 1 in
+let mayLoad = 1 in
def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr),
"ll\t$dst, $addr", [], IILoad>;
-let Constraints = "$src = $dst" in
+let mayStore = 1, Constraints = "$src = $dst" in
def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr),
"sc\t$src, $addr", [], IIStore>;
/// Jump and Branch Instructions
def J : JumpFJ<0x02, "j">;
-def JR : JumpFR<0x00, 0x08, "jr">;
+let isIndirectBranch = 1 in
+ def JR : JumpFR<0x00, 0x08, "jr">;
def JAL : JumpLink<0x03, "jal">;
def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
-def BEQ : CBranch<0x04, "beq", seteq>;
-def BNE : CBranch<0x05, "bne", setne>;
-
-let rt=1 in
- def BGEZ : CBranchZero<0x01, "bgez", setge>;
-
-let rt=0 in {
- def BGTZ : CBranchZero<0x07, "bgtz", setgt>;
- def BLEZ : CBranchZero<0x07, "blez", setle>;
- def BLTZ : CBranchZero<0x01, "bltz", setlt>;
-}
+def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
+def BNE : CBranch<0x05, "bne", setne, CPURegs>;
+def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
+def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
+def BLEZ : CBranchZero<0x07, 0, "blez", setle, CPURegs>;
+def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
def BGEZAL : BranchLink<"bgezal">;
def BLTZAL : BranchLink<"bltzal">;
@@ -639,40 +738,31 @@ let Uses = [LO] in
def MFLO : MoveFromLOHI<0x12, "mflo">;
/// Sign Ext In Register Instructions.
-let Predicates = [HasSEInReg] in {
- let shamt = 0x10, rs = 0 in
- def SEB : SignExtInReg<0x21, "seb", i8>;
-
- let shamt = 0x18, rs = 0 in
- def SEH : SignExtInReg<0x20, "seh", i16>;
-}
+def SEB : SignExtInReg<0x10, "seb", i8>;
+def SEH : SignExtInReg<0x18, "seh", i16>;
/// Count Leading
-def CLZ : CountLeading<0b100000, "clz",
- [(set CPURegs:$dst, (ctlz CPURegs:$src))]>;
-def CLO : CountLeading<0b100001, "clo",
- [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>;
+def CLZ : CountLeading<0x20, "clz",
+ [(set CPURegs:$rd, (ctlz CPURegs:$rs))]>;
+def CLO : CountLeading<0x21, "clo",
+ [(set CPURegs:$rd, (ctlz (not CPURegs:$rs)))]>;
/// Byte Swap
-let Predicates = [HasSwap] in {
- let shamt = 0x3, rs = 0 in
- def WSBW : ByteSwap<0x20, "wsbw">;
-}
-
-/// Conditional Move
-def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>;
-def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
+def WSBW : ByteSwap<0x20, 0x2, "wsbw">;
// Conditional moves:
// These instructions are expanded in
// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
// conditional move instructions.
// flag:int, data:int
-let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in
- class CondMovIntInt<bits<6> funct, string instr_asm> :
- FR<0, funct, (outs CPURegs:$dst),
- (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F),
- !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>;
+class CondMovIntInt<bits<6> funct, string instr_asm> :
+ FR<0, funct, (outs CPURegs:$rd),
+ (ins CPURegs:$rs, CPURegs:$rt, CPURegs:$F),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
+ let shamt = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$F = $rd";
+}
def MOVZ_I : CondMovIntInt<0x0a, "movz">;
def MOVN_I : CondMovIntInt<0x0b, "movn">;
@@ -685,13 +775,13 @@ let addr=0 in
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">;
+def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">;
// DynAlloc node points to dynamically allocated stack space.
// $sp is added to the list of implicitly used registers to prevent dead code
// elimination from removing instructions that modify $sp.
let Uses = [SP] in
-def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">;
+def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">;
// MADD*/MSUB*
def MADD : MArithR<0, "madd", MipsMAdd, 1>;
@@ -701,10 +791,25 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
// MUL is a assembly macro in the current used ISAs. In recent ISA's
// it is a real instruction.
-def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul, 1>, Requires<[IsMips32]>;
+def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
+ Requires<[HasMips32]>;
def RDHWR : ReadHardware;
+def EXT : ExtIns<0, "ext", (outs CPURegs:$rt),
+ (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz),
+ [(set CPURegs:$rt,
+ (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))],
+ NoItinerary>;
+
+let Constraints = "$src = $rt" in
+def INS : ExtIns<4, "ins", (outs CPURegs:$rt),
+ (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz, CPURegs:$src),
+ [(set CPURegs:$rt,
+ (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz,
+ CPURegs:$src))],
+ NoItinerary>;
+
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
@@ -738,16 +843,20 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
// hi/lo relocs
def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
+def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
(ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
(ADDiu CPURegs:$hi, tblockaddress:$lo)>;
def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
(ADDiu CPURegs:$hi, tjumptable:$lo)>;
def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
(ADDiu CPURegs:$hi, tconstpool:$lo)>;
@@ -763,6 +872,7 @@ def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
// tprel hi/lo
def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
(ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
@@ -784,60 +894,67 @@ def : Pat<(not CPURegs:$in),
// extended load and stores
def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>;
def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>;
-def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>;
+def : Pat<(extloadi16_a addr:$src), (LHu addr:$src)>;
+def : Pat<(extloadi16_u addr:$src), (ULHu addr:$src)>;
// peepholes
def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
// brcond patterns
-def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst),
- (BNE CPURegs:$lhs, ZERO, bb:$dst)>;
-def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst),
- (BEQ CPURegs:$lhs, ZERO, bb:$dst)>;
-
-def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
- (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
- (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
- (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
- (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
-
-def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
- (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
- (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
-
-def : Pat<(brcond CPURegs:$cond, bb:$dst),
- (BNE CPURegs:$cond, ZERO, bb:$dst)>;
+multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
+ Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp,
+ Instruction SLTiuOp, Register ZEROReg> {
+def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
+ (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
+def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
+ (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
+
+def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond RC:$cond, bb:$dst),
+ (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
+}
+
+defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
// select patterns
multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
- def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setuge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setge CPURegs:$lhs, immSExt16:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
- def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setuge CPURegs:$lh, immSExt16:$rh)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
- def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setle CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
- def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setule CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
- def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (seteq CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F),
+ def : Pat<(select (i32 (seteq CPURegs:$lhs, 0)), RC:$T, RC:$F),
(MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
}
multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
- def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setne CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
(MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
- def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setne CPURegs:$lhs, 0)), RC:$T, RC:$F),
(MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
}
@@ -845,30 +962,48 @@ defm : MovzPats<CPURegs, MOVZ_I>;
defm : MovnPats<CPURegs, MOVN_I>;
// setcc patterns
-def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
- (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
-def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs),
- (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
-
-def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs),
- (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>;
-def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs),
- (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>;
-
-def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs),
- (SLT CPURegs:$rhs, CPURegs:$lhs)>;
-def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs),
- (SLTu CPURegs:$rhs, CPURegs:$lhs)>;
-
-def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs),
- (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>;
-def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs),
- (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>;
-
-def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
- (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>;
-def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
- (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
+multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
+ Instruction SLTuOp, Register ZEROReg> {
+ def : Pat<(seteq RC:$lhs, RC:$rhs),
+ (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
+ def : Pat<(setne RC:$lhs, RC:$rhs),
+ (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
+}
+
+multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setle RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
+ def : Pat<(setule RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
+}
+
+multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setgt RC:$lhs, RC:$rhs),
+ (SLTOp RC:$rhs, RC:$lhs)>;
+ def : Pat<(setugt RC:$lhs, RC:$rhs),
+ (SLTuOp RC:$rhs, RC:$lhs)>;
+}
+
+multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setge RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
+ def : Pat<(setuge RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
+}
+
+multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp,
+ Instruction SLTiuOp> {
+ def : Pat<(setge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
+ def : Pat<(setuge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
+}
+
+defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>;
+defm : SetlePats<CPURegs, SLT, SLTu>;
+defm : SetgtPats<CPURegs, SLT, SLTu>;
+defm : SetgePats<CPURegs, SLT, SLTu>;
+defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
// select MipsDynAlloc
def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
@@ -878,4 +1013,5 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
//===----------------------------------------------------------------------===//
include "MipsInstrFPU.td"
+include "Mips64InstrInfo.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp
new file mode 100644
index 0000000..28c2b48
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp
@@ -0,0 +1,230 @@
+//===- MipsJITInfo.cpp - Implement the JIT interfaces for the Mips target -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Mips target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "MipsJITInfo.h"
+#include "MipsInstrInfo.h"
+#include "MipsRelocations.h"
+#include "MipsSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+
+void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring. This code saves registers, calls
+// MipsCompilationCallbackC and restores registers.
+extern "C" {
+#if defined (__mips__)
+void MipsCompilationCallback();
+
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "MipsCompilationCallback\n"
+ ASMPREFIX "MipsCompilationCallback:\n"
+ ".ent " ASMPREFIX "MipsCompilationCallback\n"
+ ".frame $29, 32, $31\n"
+ ".set noreorder\n"
+ ".cpload $t9\n"
+
+ "addiu $sp, $sp, -60\n"
+ ".cprestore 16\n"
+
+ // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain
+ // stuff for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned. We also need to save the ra register since it contains the
+ // original return address, and t8 register since it contains the address
+ // of the end of function stub.
+ "sw $a0, 20($sp)\n"
+ "sw $a1, 24($sp)\n"
+ "sw $a2, 28($sp)\n"
+ "sw $a3, 32($sp)\n"
+ "sw $ra, 36($sp)\n"
+ "sw $t8, 40($sp)\n"
+ "sdc1 $f12, 44($sp)\n"
+ "sdc1 $f14, 52($sp)\n"
+
+ // t8 points at the end of function stub. Pass the beginning of the stub
+ // to the MipsCompilationCallbackC.
+ "addiu $a0, $t8, -16\n"
+ "jal " ASMPREFIX "MipsCompilationCallbackC\n"
+ "nop\n"
+
+ // Restore registers.
+ "lw $a0, 20($sp)\n"
+ "lw $a1, 24($sp)\n"
+ "lw $a2, 28($sp)\n"
+ "lw $a3, 32($sp)\n"
+ "lw $ra, 36($sp)\n"
+ "lw $t8, 40($sp)\n"
+ "ldc1 $f12, 44($sp)\n"
+ "ldc1 $f14, 52($sp)\n"
+ "addiu $sp, $sp, 60\n"
+
+ // Jump to the (newly modified) stub to invoke the real function.
+ "addiu $t8, $t8, -16\n"
+ "jr $t8\n"
+ "nop\n"
+
+ ".set reorder\n"
+ ".end " ASMPREFIX "MipsCompilationCallback\n"
+ );
+#else // host != Mips
+ void MipsCompilationCallback() {
+ llvm_unreachable(
+ "Cannot call MipsCompilationCallback() on a non-Mips arch!");
+ }
+#endif
+}
+
+/// MipsCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
+
+ // Rewrite the function stub so that we don't end up here every time we
+ // execute the call. We're replacing the first four instructions of the
+ // stub with code that jumps to the compiled function:
+ // lui $t9, %hi(NewVal)
+ // addiu $t9, $t9, %lo(NewVal)
+ // jr $t9
+ // nop
+
+ int Hi = ((unsigned)NewVal & 0xffff0000) >> 16;
+ if ((NewVal & 0x8000) != 0)
+ Hi++;
+ int Lo = (int)(NewVal & 0xffff);
+
+ *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi;
+ *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo;
+ *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8;
+ *(intptr_t *)(StubAddr + 12) = 0;
+
+ sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16);
+}
+
+TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction(
+ JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return MipsCompilationCallback;
+}
+
+TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() {
+ // The stub contains 4 4-byte instructions, aligned at 4 bytes. See
+ // emitFunctionStub for details.
+ StubLayout Result = { 4*4, 4 };
+ return Result;
+}
+
+void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ JCE.emitAlignment(4);
+ void *Addr = (void*) (JCE.getCurrentPCValue());
+ if (!sys::Memory::setRangeWritable(Addr, 16))
+ llvm_unreachable("ERROR: Unable to mark stub writable.");
+
+ intptr_t EmittedAddr;
+ if (Fn != (void*)(intptr_t)MipsCompilationCallback)
+ EmittedAddr = (intptr_t)Fn;
+ else
+ EmittedAddr = (intptr_t)MipsCompilationCallback;
+
+
+ int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16;
+ if ((EmittedAddr & 0x8000) != 0)
+ Hi++;
+ int Lo = (int)(EmittedAddr & 0xffff);
+
+ // lui t9, %hi(EmittedAddr)
+ // addiu t9, t9, %lo(EmittedAddr)
+ // jalr t8, t9
+ // nop
+ JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi);
+ JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
+ JCE.emitWordLE(25 << 21 | 24 << 11 | 9);
+ JCE.emitWordLE(0);
+
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16))
+ llvm_unreachable("ERROR: Unable to mark stub executable.");
+
+ return Addr;
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+
+ void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
+
+ switch ((Mips::RelocationType) MR->getRelocationType()) {
+ case Mips::reloc_mips_branch:
+ ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_26:
+ ResultPtr = (ResultPtr & 0x0fffffff) >> 2;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_hi:
+ ResultPtr = ResultPtr >> 16;
+ if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) {
+ ResultPtr += 1;
+ }
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_lo:
+ ResultPtr = ResultPtr & 0xffff;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ default:
+ llvm_unreachable("ERROR: Unknown Mips relocation.");
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.h b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h
new file mode 100644
index 0000000..41f32a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h
@@ -0,0 +1,70 @@
+//===- MipsJITInfo.h - Mips implementation of the JIT interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSJITINFO_H
+#define MIPSJITINFO_H
+
+#include "MipsMachineFunction.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+class MipsTargetMachine;
+
+class MipsJITInfo : public TargetJITInfo {
+
+ bool IsPIC;
+
+ public:
+ explicit MipsJITInfo() :
+ IsPIC(false) {}
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on Mips.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ IsPIC = isPIC;
+ }
+
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index f5cc3aa..608a7d2 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -29,10 +29,10 @@ MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
: Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
- MachineOperandType MOTy) const {
+ MachineOperandType MOTy,
+ unsigned Offset) const {
MipsMCSymbolRefExpr::VariantKind Kind;
const MCSymbol *Symbol;
- int Offset = 0;
switch(MO.getTargetFlags()) {
default: assert(0 && "Invalid target flag!");
@@ -46,6 +46,11 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+ case MipsII::MO_GPOFF_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_HI; break;
+ case MipsII::MO_GPOFF_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_LO; break;
+ case MipsII::MO_GOT_DISP: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_DISP; break;
+ case MipsII::MO_GOT_PAGE: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
+ case MipsII::MO_GOT_OFST: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_OFST; break;
}
switch (MOTy) {
@@ -72,7 +77,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MachineOperand::MO_ConstantPoolIndex:
Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
if (MO.getOffset())
- Offset = MO.getOffset();
+ Offset += MO.getOffset();
break;
default:
@@ -83,36 +88,39 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Ctx));
}
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
+ MachineOperandType MOTy = MO.getType();
+
+ switch (MOTy) {
+ default:
+ assert(0 && "unknown operand type");
+ break;
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) break;
+ return MCOperand::CreateReg(MO.getReg());
+ case MachineOperand::MO_Immediate:
+ return MCOperand::CreateImm(MO.getImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(MO, MOTy, 0);
+ }
+
+ return MCOperand();
+}
+
void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- MCOperand MCOp;
- MachineOperandType MOTy = MO.getType();
+ MCOperand MCOp = LowerOperand(MO);
- switch (MOTy) {
- default:
- MI->dump();
- llvm_unreachable("unknown operand type");
- case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit()) continue;
- MCOp = MCOperand::CreateReg(MO.getReg());
- break;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ExternalSymbol:
- case MachineOperand::MO_JumpTableIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_BlockAddress:
- MCOp = LowerSymbolOperand(MO, MOTy);
- break;
- }
-
- OutMI.addOperand(MCOp);
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
}
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
index ec5201b..223f23a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
@@ -1,4 +1,4 @@
-//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -36,7 +36,8 @@ public:
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
- MachineOperandType MOTy) const;
+ MachineOperandType MOTy, unsigned Offset) const;
+ MCOperand LowerOperand(const MachineOperand& MO) const;
};
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
index 9a2bdae..a0a242c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
@@ -33,6 +33,11 @@ void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+ case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break;
+ case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break;
+ case VK_Mips_GOT_DISP: OS << "%got_disp("; break;
+ case VK_Mips_GOT_PAGE: OS << "%got_page("; break;
+ case VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
}
OS << *Symbol;
@@ -43,7 +48,9 @@ void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
OS << Offset;
}
- if (Kind != VK_Mips_None)
+ if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO)
+ OS << ")))";
+ else if (Kind != VK_Mips_None)
OS << ')';
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
index 3e69596..55e85a7 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
+++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
@@ -25,7 +25,12 @@ public:
VK_Mips_TLSGD,
VK_Mips_GOTTPREL,
VK_Mips_TPREL_HI,
- VK_Mips_TPREL_LO
+ VK_Mips_TPREL_LO,
+ VK_Mips_GPOFF_HI,
+ VK_Mips_GPOFF_LO,
+ VK_Mips_GOT_DISP,
+ VK_Mips_GOT_PAGE,
+ VK_Mips_GOT_OFST
};
private:
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
index dbb7a67..bc30b6b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -51,16 +51,12 @@ private:
mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize;
- /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
- /// intrinsics, it is necessary to use a temporary stack location.
- /// This field holds the frame index of this location.
- int AtomicFrameIndex;
public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
- MaxCallFrameSize(0), AtomicFrameIndex(-1)
+ MaxCallFrameSize(0)
{}
bool isInArgFI(int FI) const {
@@ -104,9 +100,6 @@ public:
unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
-
- int getAtomicFrameIndex() const { return AtomicFrameIndex; }
- void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; }
};
} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 24390da..f8c0fda 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,7 +43,7 @@ using namespace llvm;
MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
const TargetInstrInfo &tii)
- : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {}
+ : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {}
/// getRegisterNumbering - Given the enum value for some register, e.g.
/// Mips::RA, return the number that it corresponds to (e.g. 31).
@@ -52,39 +51,87 @@ unsigned MipsRegisterInfo::
getRegisterNumbering(unsigned RegEnum)
{
switch (RegEnum) {
- case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
- case Mips::AT : case Mips::F1 : return 1;
- case Mips::V0 : case Mips::F2 : case Mips::D1 : return 2;
- case Mips::V1 : case Mips::F3 : return 3;
- case Mips::A0 : case Mips::F4 : case Mips::D2 : return 4;
- case Mips::A1 : case Mips::F5 : return 5;
- case Mips::A2 : case Mips::F6 : case Mips::D3 : return 6;
- case Mips::A3 : case Mips::F7 : return 7;
- case Mips::T0 : case Mips::F8 : case Mips::D4 : return 8;
- case Mips::T1 : case Mips::F9 : return 9;
- case Mips::T2 : case Mips::F10: case Mips::D5: return 10;
- case Mips::T3 : case Mips::F11: return 11;
- case Mips::T4 : case Mips::F12: case Mips::D6: return 12;
- case Mips::T5 : case Mips::F13: return 13;
- case Mips::T6 : case Mips::F14: case Mips::D7: return 14;
- case Mips::T7 : case Mips::F15: return 15;
- case Mips::S0 : case Mips::F16: case Mips::D8: return 16;
- case Mips::S1 : case Mips::F17: return 17;
- case Mips::S2 : case Mips::F18: case Mips::D9: return 18;
- case Mips::S3 : case Mips::F19: return 19;
- case Mips::S4 : case Mips::F20: case Mips::D10: return 20;
- case Mips::S5 : case Mips::F21: return 21;
- case Mips::S6 : case Mips::F22: case Mips::D11: return 22;
- case Mips::S7 : case Mips::F23: return 23;
- case Mips::T8 : case Mips::F24: case Mips::D12: return 24;
- case Mips::T9 : case Mips::F25: return 25;
- case Mips::K0 : case Mips::F26: case Mips::D13: return 26;
- case Mips::K1 : case Mips::F27: return 27;
- case Mips::GP : case Mips::F28: case Mips::D14: return 28;
- case Mips::SP : case Mips::F29: return 29;
- case Mips::FP : case Mips::F30: case Mips::D15: return 30;
- case Mips::RA : case Mips::F31: return 31;
- default: llvm_unreachable("Unknown register number!");
+ case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
+ case Mips::D0:
+ return 0;
+ case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
+ return 1;
+ case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
+ case Mips::D1:
+ return 2;
+ case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
+ return 3;
+ case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
+ case Mips::D2:
+ return 4;
+ case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
+ return 5;
+ case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
+ case Mips::D3:
+ return 6;
+ case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
+ return 7;
+ case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
+ case Mips::D4:
+ return 8;
+ case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
+ return 9;
+ case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
+ case Mips::D5:
+ return 10;
+ case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
+ return 11;
+ case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
+ case Mips::D6:
+ return 12;
+ case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
+ return 13;
+ case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
+ case Mips::D7:
+ return 14;
+ case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
+ return 15;
+ case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
+ case Mips::D8:
+ return 16;
+ case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
+ return 17;
+ case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
+ case Mips::D9:
+ return 18;
+ case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
+ return 19;
+ case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
+ case Mips::D10:
+ return 20;
+ case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
+ return 21;
+ case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
+ case Mips::D11:
+ return 22;
+ case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
+ return 23;
+ case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
+ case Mips::D12:
+ return 24;
+ case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
+ return 25;
+ case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
+ case Mips::D13:
+ return 26;
+ case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
+ return 27;
+ case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
+ case Mips::D14:
+ return 28;
+ case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ return 29;
+ case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
+ case Mips::D15:
+ return 30;
+ case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
+ return 31;
+ default: llvm_unreachable("Unknown register number!");
}
return 0; // Not reached
}
@@ -101,7 +148,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const
{
// Mips callee-save register range is $16-$23, $f20-$f30
static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
- Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
+ Mips::F31, Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20,
Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
@@ -113,31 +160,71 @@ getCalleeSavedRegs(const MachineFunction *MF) const
Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
};
+ static const unsigned N32CalleeSavedRegs[] = {
+ Mips::D31_64, Mips::D29_64, Mips::D27_64, Mips::D25_64, Mips::D23_64,
+ Mips::D21_64,
+ Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
+ Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
+ Mips::S0_64, 0
+ };
+
+ static const unsigned N64CalleeSavedRegs[] = {
+ Mips::D31_64, Mips::D30_64, Mips::D29_64, Mips::D28_64, Mips::D27_64,
+ Mips::D26_64, Mips::D25_64, Mips::D24_64,
+ Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
+ Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
+ Mips::S0_64, 0
+ };
+
if (Subtarget.isSingleFloat())
return SingleFloatOnlyCalleeSavedRegs;
- else
+ else if (!Subtarget.hasMips64())
return Mips32CalleeSavedRegs;
+ else if (Subtarget.isABI_N32())
+ return N32CalleeSavedRegs;
+
+ assert(Subtarget.isABI_N64());
+ return N64CalleeSavedRegs;
}
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
+ static const unsigned ReservedCPURegs[] = {
+ Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
+ Mips::GP, Mips::SP, Mips::FP, Mips::RA, 0
+ };
+
+ static const unsigned ReservedCPU64Regs[] = {
+ Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
+ Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64, 0
+ };
+
BitVector Reserved(getNumRegs());
- Reserved.set(Mips::ZERO);
- Reserved.set(Mips::AT);
- Reserved.set(Mips::K0);
- Reserved.set(Mips::K1);
- Reserved.set(Mips::GP);
- Reserved.set(Mips::SP);
- Reserved.set(Mips::FP);
- Reserved.set(Mips::RA);
- Reserved.set(Mips::F31);
- Reserved.set(Mips::D15);
-
- // SRV4 requires that odd register can't be used.
- if (!Subtarget.isSingleFloat() && !Subtarget.isMips32())
- for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
- Reserved.set(FReg);
+ typedef TargetRegisterClass::iterator RegIter;
+
+ for (const unsigned *Reg = ReservedCPURegs; *Reg; ++Reg)
+ Reserved.set(*Reg);
+ if (Subtarget.hasMips64()) {
+ for (const unsigned *Reg = ReservedCPU64Regs; *Reg; ++Reg)
+ Reserved.set(*Reg);
+
+ // Reserve all registers in AFGR64.
+ for (RegIter Reg = Mips::AFGR64RegisterClass->begin();
+ Reg != Mips::AFGR64RegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+ }
+ else {
+ // Reserve all registers in CPU64Regs & FGR64.
+ for (RegIter Reg = Mips::CPU64RegsRegisterClass->begin();
+ Reg != Mips::CPU64RegsRegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+
+ for (RegIter Reg = Mips::FGR64RegisterClass->begin();
+ Reg != Mips::FGR64RegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+ }
+
return Reserved;
}
@@ -245,11 +332,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
unsigned MipsRegisterInfo::
-getRARegister() const {
- return Mips::RA;
-}
-
-unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -267,12 +349,3 @@ getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
return 0;
}
-
-int MipsRegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return MipsGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 646369b..67e57dd 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -57,15 +57,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
/// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
/// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
index f0db518..925ad9e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -10,6 +10,11 @@
//===----------------------------------------------------------------------===//
// Declarations that describe the MIPS register file
//===----------------------------------------------------------------------===//
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
// We have banks of 32 registers each.
class MipsReg<string n> : Register<n> {
@@ -28,22 +33,31 @@ class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
let Num = num;
}
+// Mips 64-bit CPU Registers
+class Mips64GPRReg<bits<5> num, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<n, subregs> {
+ let Num = num;
+ let SubRegIndices = [sub_32];
+}
+
// Mips 32-bit FPU Registers
class FPR<bits<5> num, string n> : MipsReg<n> {
let Num = num;
}
// Mips 64-bit (aliased) FPU Registers
-let Namespace = "Mips" in {
-def sub_fpeven : SubRegIndex;
-def sub_fpodd : SubRegIndex;
-}
class AFPR<bits<5> num, string n, list<Register> subregs>
: MipsRegWithSubRegs<n, subregs> {
let Num = num;
let SubRegIndices = [sub_fpeven, sub_fpodd];
}
+class AFPR64<bits<5> num, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<n, subregs> {
+ let Num = num;
+ let SubRegIndices = [sub_32];
+}
+
// Mips Hardware Registers
class HWR<bits<5> num, string n> : MipsReg<n> {
let Num = num;
@@ -54,6 +68,7 @@ class HWR<bits<5> num, string n> : MipsReg<n> {
//===----------------------------------------------------------------------===//
let Namespace = "Mips" in {
+ // FIXME: Fix DwarfRegNum.
// General Purpose Registers
def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
@@ -89,6 +104,40 @@ let Namespace = "Mips" in {
def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>;
def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
+ // General Purpose 64-bit Registers
+ def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>;
+ def AT_64 : Mips64GPRReg< 1, "AT", [AT]>;
+ def V0_64 : Mips64GPRReg< 2, "2", [V0]>;
+ def V1_64 : Mips64GPRReg< 3, "3", [V1]>;
+ def A0_64 : Mips64GPRReg< 4, "4", [A0]>;
+ def A1_64 : Mips64GPRReg< 5, "5", [A1]>;
+ def A2_64 : Mips64GPRReg< 6, "6", [A2]>;
+ def A3_64 : Mips64GPRReg< 7, "7", [A3]>;
+ def T0_64 : Mips64GPRReg< 8, "8", [T0]>;
+ def T1_64 : Mips64GPRReg< 9, "9", [T1]>;
+ def T2_64 : Mips64GPRReg< 10, "10", [T2]>;
+ def T3_64 : Mips64GPRReg< 11, "11", [T3]>;
+ def T4_64 : Mips64GPRReg< 12, "12", [T4]>;
+ def T5_64 : Mips64GPRReg< 13, "13", [T5]>;
+ def T6_64 : Mips64GPRReg< 14, "14", [T6]>;
+ def T7_64 : Mips64GPRReg< 15, "15", [T7]>;
+ def S0_64 : Mips64GPRReg< 16, "16", [S0]>;
+ def S1_64 : Mips64GPRReg< 17, "17", [S1]>;
+ def S2_64 : Mips64GPRReg< 18, "18", [S2]>;
+ def S3_64 : Mips64GPRReg< 19, "19", [S3]>;
+ def S4_64 : Mips64GPRReg< 20, "20", [S4]>;
+ def S5_64 : Mips64GPRReg< 21, "21", [S5]>;
+ def S6_64 : Mips64GPRReg< 22, "22", [S6]>;
+ def S7_64 : Mips64GPRReg< 23, "23", [S7]>;
+ def T8_64 : Mips64GPRReg< 24, "24", [T8]>;
+ def T9_64 : Mips64GPRReg< 25, "25", [T9]>;
+ def K0_64 : Mips64GPRReg< 26, "26", [K0]>;
+ def K1_64 : Mips64GPRReg< 27, "27", [K1]>;
+ def GP_64 : Mips64GPRReg< 28, "GP", [GP]>;
+ def SP_64 : Mips64GPRReg< 29, "SP", [SP]>;
+ def FP_64 : Mips64GPRReg< 30, "FP", [FP]>;
+ def RA_64 : Mips64GPRReg< 31, "RA", [RA]>;
+
/// Mips Single point precision FPU Registers
def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>;
@@ -142,10 +191,49 @@ let Namespace = "Mips" in {
def D14 : AFPR<28, "F28", [F28, F29]>;
def D15 : AFPR<30, "F30", [F30, F31]>;
+ /// Mips Double point precision FPU Registers in MFP64 mode.
+ def D0_64 : AFPR64<0, "F0", [F0]>;
+ def D1_64 : AFPR64<1, "F1", [F1]>;
+ def D2_64 : AFPR64<2, "F2", [F2]>;
+ def D3_64 : AFPR64<3, "F3", [F3]>;
+ def D4_64 : AFPR64<4, "F4", [F4]>;
+ def D5_64 : AFPR64<5, "F5", [F5]>;
+ def D6_64 : AFPR64<6, "F6", [F6]>;
+ def D7_64 : AFPR64<7, "F7", [F7]>;
+ def D8_64 : AFPR64<8, "F8", [F8]>;
+ def D9_64 : AFPR64<9, "F9", [F9]>;
+ def D10_64 : AFPR64<10, "F10", [F10]>;
+ def D11_64 : AFPR64<11, "F11", [F11]>;
+ def D12_64 : AFPR64<12, "F12", [F12]>;
+ def D13_64 : AFPR64<13, "F13", [F13]>;
+ def D14_64 : AFPR64<14, "F14", [F14]>;
+ def D15_64 : AFPR64<15, "F15", [F15]>;
+ def D16_64 : AFPR64<16, "F16", [F16]>;
+ def D17_64 : AFPR64<17, "F17", [F17]>;
+ def D18_64 : AFPR64<18, "F18", [F18]>;
+ def D19_64 : AFPR64<19, "F19", [F19]>;
+ def D20_64 : AFPR64<20, "F20", [F20]>;
+ def D21_64 : AFPR64<21, "F21", [F21]>;
+ def D22_64 : AFPR64<22, "F22", [F22]>;
+ def D23_64 : AFPR64<23, "F23", [F23]>;
+ def D24_64 : AFPR64<24, "F24", [F24]>;
+ def D25_64 : AFPR64<25, "F25", [F25]>;
+ def D26_64 : AFPR64<26, "F26", [F26]>;
+ def D27_64 : AFPR64<27, "F27", [F27]>;
+ def D28_64 : AFPR64<28, "F28", [F28]>;
+ def D29_64 : AFPR64<29, "F29", [F29]>;
+ def D30_64 : AFPR64<30, "F30", [F30]>;
+ def D31_64 : AFPR64<31, "F31", [F31]>;
+
// Hi/Lo registers
def HI : Register<"hi">, DwarfRegNum<[64]>;
def LO : Register<"lo">, DwarfRegNum<[65]>;
+ let SubRegIndices = [sub_32] in {
+ def HI64 : RegisterWithSubRegs<"hi", [HI]>;
+ def LO64 : RegisterWithSubRegs<"lo", [LO]>;
+ }
+
// Status flags register
def FCR31 : Register<"31">;
@@ -167,6 +255,18 @@ def CPURegs : RegisterClass<"Mips", [i32], 32, (add
// Reserved
ZERO, AT, K0, K1, GP, SP, FP, RA)>;
+def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add
+ // Return Values and Arguments
+ V0_64, V1_64, A0_64, A1_64, A2_64, A3_64,
+ // Not preserved across procedure calls
+ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, T9_64,
+ // Callee save
+ S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64,
+ // Reserved
+ ZERO_64, AT_64, K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)> {
+ let SubRegClasses = [(CPURegs sub_32)];
+}
+
// 64bit fp:
// * FGR64 - 32 64-bit registers
// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
@@ -182,17 +282,22 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
// Not preserved across procedure calls
D2, D3, D4, D5, D8, D9,
// Callee save
- D10, D11, D12, D13, D14,
- // Reserved
- D15)> {
+ D10, D11, D12, D13, D14, D15)> {
let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
}
+def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> {
+ let SubRegClasses = [(FGR32 sub_32)];
+}
+
// Condition Register for floating point operations
def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>;
// Hi/Lo Registers
def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
+ let SubRegClasses = [(HILO sub_32)];
+}
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsRelocations.h b/contrib/llvm/lib/Target/Mips/MipsRelocations.h
new file mode 100644
index 0000000..66d1bfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRelocations.h
@@ -0,0 +1,41 @@
+//===- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file defines the Mips target-specific relocation types
+// (for relocation-model=static).
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef MIPSRELOCATIONS_H_
+#define MIPSRELOCATIONS_H_
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Mips{
+ enum RelocationType {
+ // reloc_mips_branch - pc relative relocation for branches. The lower 18
+ // bits of the difference between the branch target and the branch
+ // instruction, shifted right by 2.
+ reloc_mips_branch = 1,
+
+ // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the
+ // lower 16 bits of the address is negative).
+ reloc_mips_hi = 2,
+
+ // reloc_mips_lo - lower 16 bits of the address.
+ reloc_mips_lo = 3,
+
+ // reloc_mips_26 - lower 28 bits of the address, shifted right by 2.
+ reloc_mips_26 = 4
+ };
+ }
+}
+
+#endif /* MIPSRELOCATIONS_H_ */
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 6eee333..016d449 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -13,7 +13,7 @@
#include "MipsSubtarget.h"
#include "Mips.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -24,15 +24,14 @@ using namespace llvm;
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little) :
MipsGenSubtargetInfo(TT, CPU, FS),
- MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
- IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
- HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
- HasSwap(false), HasBitCount(false)
+ MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
+ IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
+ IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
+ HasMinMax(false), HasSwap(false), HasBitCount(false)
{
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = "mips1";
- MipsArchVersion = Mips1;
+ CPUName = "mips32r1";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
@@ -40,23 +39,16 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
+ // Set MipsABI if it hasn't been set yet.
+ if (MipsABI == UnknownABI)
+ MipsABI = hasMips64() ? N64 : O32;
+
+ // Check if Architecture and ABI are compatible.
+ assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) ||
+ (hasMips64() && (isABI_N32() || isABI_N64()))) &&
+ "Invalid Arch & ABI pair.");
+
// Is the target system Linux ?
if (TT.find("linux") == std::string::npos)
IsLinux = false;
-
- // When only the target triple is specified and is
- // a allegrex target, set the features. We also match
- // big and little endian allegrex cores (dont really
- // know if a big one exists)
- if (TT.find("mipsallegrex") != std::string::npos ||
- TT.find("psp") != std::string::npos) {
- MipsABI = EABI;
- IsSingleFloat = true;
- MipsArchVersion = Mips2;
- HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet)
- HasSEInReg = true;
- HasBitCount = true;
- HasSwap = true;
- HasCondMov = true;
- }
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 533d4af..d9dddad 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -27,14 +27,15 @@ class StringRef;
class MipsSubtarget : public MipsGenSubtargetInfo {
public:
+ // NOTE: O64 will not be supported.
enum MipsABIEnum {
- O32, O64, N32, N64, EABI
+ UnknownABI, O32, N32, N64, EABI
};
protected:
enum MipsArchEnum {
- Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
+ Mips32, Mips32r2, Mips64, Mips64r2
};
// Mips architecture version
@@ -90,6 +91,8 @@ public:
/// Only O32 and EABI supported right now.
bool isABI_EABI() const { return MipsABI == EABI; }
+ bool isABI_N64() const { return MipsABI == N64; }
+ bool isABI_N32() const { return MipsABI == N32; }
bool isABI_O32() const { return MipsABI == O32; }
unsigned getTargetABI() const { return MipsABI; }
@@ -102,9 +105,11 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool isMips1() const { return MipsArchVersion == Mips1; }
- bool isMips32() const { return MipsArchVersion >= Mips32; }
- bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
+ bool hasMips32() const { return MipsArchVersion >= Mips32; }
+ bool hasMips32r2() const { return MipsArchVersion == Mips32r2 ||
+ MipsArchVersion == Mips64r2; }
+ bool hasMips64() const { return MipsArchVersion >= Mips64; }
+ bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 20b9f4e..6480da3 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -14,13 +14,15 @@
#include "Mips.h"
#include "MipsTargetMachine.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeMipsTarget() {
// Register the target.
- RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget);
+ RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
+ RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target);
+ RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget);
}
// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -31,30 +33,47 @@ extern "C" void LLVMInitializeMipsTarget() {
// an easier handling.
// Using CodeModel::Large enables different CALL behavior.
MipsTargetMachine::
-MipsTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
- bool isLittle=false):
- LLVMTargetMachine(T, TT, CPU, FS),
+MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool isLittle):
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS, isLittle),
- DataLayout(isLittle ?
- std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
- std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ DataLayout(isLittle ?
+ (Subtarget.isABI_N64() ?
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ (Subtarget.isABI_N64() ?
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
InstrInfo(*this),
FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this) {
- // Abicall enables PIC by default
- if (getRelocationModel() == Reloc::Default) {
- if (Subtarget.isABI_O32())
- setRelocationModel(Reloc::PIC_);
- else
- setRelocationModel(Reloc::Static);
- }
+ TLInfo(*this), TSInfo(*this), JITInfo() {
}
+MipsebTargetMachine::
+MipsebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+
MipselTargetMachine::
-MipselTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS) :
- MipsTargetMachine(T, TT, CPU, FS, true) {}
+MipselTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+
+Mips64ebTargetMachine::
+Mips64ebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+
+Mips64elTargetMachine::
+Mips64elTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
@@ -77,7 +96,10 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
bool MipsTargetMachine::
addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
- PM.add(createMipsEmitGPRestorePass(*this));
+ // Do not restore $gp if target is Mips64.
+ // In N32/64, $gp is a callee-saved register.
+ if (!Subtarget.hasMips64())
+ PM.add(createMipsEmitGPRestorePass(*this));
return true;
}
@@ -86,3 +108,12 @@ addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
PM.add(createMipsExpandPseudoPass(*this));
return true;
}
+
+bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for Mips.
+ PM.add(createMipsJITCodeEmitterPass(*this, JCE));
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
index a021af2..118ed10 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -22,6 +22,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "MipsJITInfo.h"
namespace llvm {
class formatted_raw_ostream;
@@ -33,9 +34,12 @@ namespace llvm {
MipsFrameLowering FrameLowering;
MipsTargetLowering TLInfo;
MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
+
public:
- MipsTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
+ MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool isLittle);
virtual const MipsInstrInfo *getInstrInfo() const
@@ -46,6 +50,9 @@ namespace llvm {
{ return &Subtarget; }
virtual const TargetData *getTargetData() const
{ return &DataLayout;}
+ virtual MipsJITInfo *getJITInfo()
+ { return &JITInfo; }
+
virtual const MipsRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
@@ -67,16 +74,47 @@ namespace llvm {
virtual bool addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+ virtual bool addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE);
+
};
-/// MipselTargetMachine - Mipsel target machine.
+/// MipsebTargetMachine - Mips32 big endian target machine.
+///
+class MipsebTargetMachine : public MipsTargetMachine {
+public:
+ MipsebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// MipselTargetMachine - Mips32 little endian target machine.
///
class MipselTargetMachine : public MipsTargetMachine {
public:
- MipselTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ MipselTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
+/// Mips64ebTargetMachine - Mips64 big endian target machine.
+///
+class Mips64ebTargetMachine : public MipsTargetMachine {
+public:
+ Mips64ebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// Mips64elTargetMachine - Mips64 little endian target machine.
+///
+class Mips64elTargetMachine : public MipsTargetMachine {
+public:
+ Mips64elTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index cf5d1b5..05c46f5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -79,7 +79,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (Kind.isMergeable1ByteCString())
return false;
- const Type *Ty = GV->getType()->getElementType();
+ Type *Ty = GV->getType()->getElementType();
return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
}
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index a8d6fe9..243632b 100644
--- a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -9,13 +9,23 @@
#include "Mips.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheMipsTarget, llvm::TheMipselTarget;
+Target llvm::TheMips64Target, llvm::TheMips64elTarget;
extern "C" void LLVMInitializeMipsTargetInfo() {
- RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips");
+ RegisterTarget<Triple::mips,
+ /*HasJIT=*/true> X(TheMipsTarget, "mips", "Mips");
- RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel");
+ RegisterTarget<Triple::mipsel,
+ /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel");
+
+ RegisterTarget<Triple::mips64,
+ /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]");
+
+ RegisterTarget<Triple::mips64el,
+ /*HasJIT=*/false> B(TheMips64elTarget,
+ "mips64el", "Mips64el [experimental]");
}
diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt
deleted file mode 100644
index 331266d..0000000
--- a/contrib/llvm/lib/Target/PTX/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS PTX.td)
-
-tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
-tablegen(PTXGenDAGISel.inc -gen-dag-isel)
-tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
-tablegen(PTXGenInstrNames.inc -gen-instr-enums)
-tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
-tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(PTXGenRegisterNames.inc -gen-register-enums)
-tablegen(PTXGenSubtarget.inc -gen-subtarget)
-
-add_llvm_target(PTXCodeGen
- PTXAsmPrinter.cpp
- PTXISelDAGToDAG.cpp
- PTXISelLowering.cpp
- PTXInstrInfo.cpp
- PTXFrameLowering.cpp
- PTXMCAsmInfo.cpp
- PTXMCAsmStreamer.cpp
- PTXMFInfoExtract.cpp
- PTXRegisterInfo.cpp
- PTXSubtarget.cpp
- PTXTargetMachine.cpp
- )
-
-add_subdirectory(TargetInfo)
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
new file mode 100644
index 0000000..aabb404
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -0,0 +1,192 @@
+//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PTXInstPrinter.h"
+#include "MCTargetDesc/PTXBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PTXGenAsmWriter.inc"
+
+PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printPredicate(MI, O);
+ switch (MI->getOpcode()) {
+ default:
+ printInstruction(MI, O);
+ break;
+ case PTX::CALL:
+ printCall(MI, O);
+ }
+ O << ";";
+ printAnnotation(O, Annot);
+}
+
+void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) {
+ // The last two operands are the predicate operands
+ int RegIndex;
+ int OpIndex;
+
+ if (MI->getOpcode() == PTX::CALL) {
+ RegIndex = 0;
+ OpIndex = 1;
+ } else {
+ RegIndex = MI->getNumOperands()-2;
+ OpIndex = MI->getNumOperands()-1;
+ }
+
+ int PredOp = MI->getOperand(OpIndex).getImm();
+ if (PredOp == PTXPredicate::None)
+ return;
+
+ if (PredOp == PTXPredicate::Negate)
+ O << '!';
+ else
+ O << '@';
+
+ printOperand(MI, RegIndex, O);
+}
+
+void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
+ O << "\tcall.uni\t";
+ // The first two operands are the predicate slot
+ unsigned Index = 2;
+ unsigned NumRets = MI->getOperand(Index++).getImm();
+
+ if (NumRets > 0) {
+ O << "(";
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumRets; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ O << "), ";
+ }
+
+ O << *(MI->getOperand(Index++).getExpr()) << ", (";
+
+ unsigned NumArgs = MI->getOperand(Index++).getImm();
+ if (NumArgs > 0) {
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ }
+ O << ")";
+}
+
+void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ double Imm = Op.getFPImm();
+ APFloat FPImm(Imm);
+ APInt FPIntImm = FPImm.bitcastToAPInt();
+ O << "0D";
+ // PTX requires us to output the full 64 bits, even if the number is zero
+ if (FPIntImm.getZExtValue() > 0) {
+ O << FPIntImm.toString(16, false);
+ } else {
+ O << "0000000000000000";
+ }
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ const MCSymbol &Sym = SymRefExpr->getSymbol();
+ O << Sym.getName();
+ } else {
+ O << *Op.getExpr();
+ }
+ }
+}
+
+void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // By definition, operand OpNo+1 is an i32imm
+ const MCOperand &Op2 = MI->getOperand(OpNo+1);
+ printOperand(MI, OpNo, O);
+ if (Op2.getImm() == 0)
+ return; // don't print "+0"
+ O << "+" << Op2.getImm();
+}
+
+void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert (Op.isImm() && "Rounding modes must be immediate values");
+ switch (Op.getImm()) {
+ default:
+ llvm_unreachable("Unknown rounding mode!");
+ case PTXRoundingMode::RndDefault:
+ llvm_unreachable("FP rounding-mode pass did not handle instruction!");
+ break;
+ case PTXRoundingMode::RndNone:
+ // Do not print anything.
+ break;
+ case PTXRoundingMode::RndNearestEven:
+ O << ".rn";
+ break;
+ case PTXRoundingMode::RndTowardsZero:
+ O << ".rz";
+ break;
+ case PTXRoundingMode::RndNegInf:
+ O << ".rm";
+ break;
+ case PTXRoundingMode::RndPosInf:
+ O << ".rp";
+ break;
+ case PTXRoundingMode::RndApprox:
+ O << ".approx";
+ break;
+ case PTXRoundingMode::RndNearestEvenInt:
+ O << ".rni";
+ break;
+ case PTXRoundingMode::RndTowardsZeroInt:
+ O << ".rzi";
+ break;
+ case PTXRoundingMode::RndNegInfInt:
+ O << ".rmi";
+ break;
+ case PTXRoundingMode::RndPosInfInt:
+ O << ".rpi";
+ break;
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
new file mode 100644
index 0000000..86dfd48
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
@@ -0,0 +1,47 @@
+//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints n PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXINSTPRINTER_H
+#define PTXINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PTXInstPrinter : public MCInstPrinter {
+public:
+ PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printPredicate(const MCInst *MI, raw_ostream &O);
+ void printCall(const MCInst *MI, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index df0f63f..0000000
--- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMPTXDesc
- PTXMCTargetDesc.cpp
- PTXMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile
deleted file mode 100644
index 35f5a7b..0000000
--- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPTXDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
new file mode 100644
index 0000000..c6094be
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -0,0 +1,63 @@
+//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the PTX target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXBASEINFO_H
+#define PTXBASEINFO_H
+
+#include "PTXMCTargetDesc.h"
+
+namespace llvm {
+ namespace PTXStateSpace {
+ enum {
+ Global = 0, // default to global state space
+ Constant = 1,
+ Local = 2,
+ Parameter = 3,
+ Shared = 4
+ };
+ } // namespace PTXStateSpace
+
+ namespace PTXPredicate {
+ enum {
+ Normal = 0,
+ Negate = 1,
+ None = 2
+ };
+ } // namespace PTXPredicate
+
+ /// Namespace to hold all target-specific flags.
+ namespace PTXRoundingMode {
+ // Instruction Flags
+ enum {
+ // Rounding Mode Flags
+ RndMask = 15,
+ RndDefault = 0, // ---
+ RndNone = 1, // <NONE>
+ RndNearestEven = 2, // .rn
+ RndTowardsZero = 3, // .rz
+ RndNegInf = 4, // .rm
+ RndPosInf = 5, // .rp
+ RndApprox = 6, // .approx
+ RndNearestEvenInt = 7, // .rni
+ RndTowardsZeroInt = 8, // .rzi
+ RndNegInfInt = 9, // .rmi
+ RndPosInfInt = 10 // .rpi
+ };
+ } // namespace PTXII
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
index 23f70bd..a5af3b8 100644
--- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "PTXMCTargetDesc.h"
#include "PTXMCAsmInfo.h"
+#include "InstPrinter/PTXInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "PTXGenInstrInfo.inc"
@@ -35,9 +37,11 @@ static MCInstrInfo *createPTXMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializePTXMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ // PTX does not have a return address register.
+ InitPTXMCRegisterInfo(X, 0);
+ return X;
}
static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -47,14 +51,45 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializePTXMCSubtargetInfo() {
+static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ assert(SyntaxVariant == 0 && "We only have one syntax variant");
+ return new PTXInstPrinter(MAI, STI);
+}
+
+extern "C" void LLVMInitializePTXTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+ RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
createPTXMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
createPTXMCSubtargetInfo);
-}
-extern "C" void LLVMInitializePTXMCAsmInfo() {
- RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
- RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h
index 28cab24..7d46cce 100644
--- a/contrib/llvm/lib/Target/PTX/PTX.h
+++ b/contrib/llvm/lib/Target/PTX/PTX.h
@@ -15,34 +15,30 @@
#ifndef PTX_H
#define PTX_H
-#include "MCTargetDesc/PTXMCTargetDesc.h"
+#include "MCTargetDesc/PTXBaseInfo.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+ class MachineInstr;
+ class MCInst;
+ class PTXAsmPrinter;
class PTXTargetMachine;
class FunctionPass;
- namespace PTX {
- enum StateSpace {
- GLOBAL = 0, // default to global state space
- CONSTANT = 1,
- LOCAL = 2,
- PARAMETER = 3,
- SHARED = 4
- };
-
- enum Predicate {
- PRED_NORMAL = 0,
- PRED_NEGATE = 1
- };
- } // namespace PTX
-
FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+ FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXRegisterAllocator();
+
+ void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP);
+
} // namespace llvm;
#endif // PTX_H
diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td
index f6fbe9f..693bb9c 100644
--- a/contrib/llvm/lib/Target/PTX/PTX.td
+++ b/contrib/llvm/lib/Target/PTX/PTX.td
@@ -52,13 +52,13 @@ def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
"Use Shader Model 1.3">;
def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
- "Use Shader Model 2.0">;
+ "Use Shader Model 2.0", [FeatureDouble]>;
def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
- "Use Shader Model 2.1">;
+ "Use Shader Model 2.1", [FeatureDouble]>;
def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
- "Use Shader Model 2.2">;
+ "Use Shader Model 2.2", [FeatureDouble]>;
def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
- "Use Shader Model 2.3">;
+ "Use Shader Model 2.3", [FeatureDouble]>;
def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
"PTX_COMPUTE_1_0",
@@ -74,7 +74,8 @@ def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
"Use Compute Compatibility 1.3">;
def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
"PTX_COMPUTE_2_0",
- "Use Compute Compatibility 2.0">;
+ "Use Compute Compatibility 2.0",
+ [FeatureDouble]>;
//===----------------------------------------------------------------------===//
// PTX supported processors
@@ -113,12 +114,6 @@ def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
include "PTXRegisterInfo.td"
//===----------------------------------------------------------------------===//
-// Calling Conventions
-//===----------------------------------------------------------------------===//
-
-include "PTXCallingConv.td"
-
-//===----------------------------------------------------------------------===//
// Instruction Descriptions
//===----------------------------------------------------------------------===//
@@ -127,9 +122,20 @@ include "PTXInstrInfo.td"
def PTXInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// PTX uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def PTXAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
// Target Declaration
//===----------------------------------------------------------------------===//
def PTX : Target {
let InstructionSet = PTXInstrInfo;
+ let AssemblyWriters = [PTXAsmWriter];
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
index 2848d54..733744b 100644
--- a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -15,9 +15,14 @@
#define DEBUG_TYPE "ptx-asm-printer"
#include "PTX.h"
+#include "PTXAsmPrinter.h"
#include "PTXMachineFunctionInfo.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
#include "PTXTargetMachine.h"
+#include "llvm/Argument.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -28,69 +33,32 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
-class PTXAsmPrinter : public AsmPrinter {
-public:
- explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {}
-
- const char *getPassName() const { return "PTX Assembly Printer"; }
-
- bool doFinalization(Module &M);
-
- virtual void EmitStartOfAsmFile(Module &M);
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void EmitFunctionBodyStart();
- virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); }
-
- virtual void EmitInstruction(const MachineInstr *MI);
-
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
- void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
-
- unsigned GetOrCreateSourceID(StringRef FileName,
- StringRef DirName);
-
- // autogen'd.
- void printInstruction(const MachineInstr *MI, raw_ostream &OS);
- static const char *getRegisterName(unsigned RegNo);
-
-private:
- void EmitVariableDeclaration(const GlobalVariable *gv);
- void EmitFunctionDeclaration();
-
- StringMap<unsigned> SourceIdMap;
-}; // class PTXAsmPrinter
-} // namespace
-
static const char PARAM_PREFIX[] = "__param_";
static const char RETURN_PREFIX[] = "__ret_";
-static const char *getRegisterTypeName(unsigned RegNo) {
-#define TEST_REGCLS(cls, clsstr) \
- if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+static const char *getRegisterTypeName(unsigned RegNo,
+ const MachineRegisterInfo& MRI) {
+ const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
+
+#define TEST_REGCLS(cls, clsstr) \
+ if (PTX::cls ## RegisterClass == TRC) return # clsstr;
+
TEST_REGCLS(RegPred, pred);
TEST_REGCLS(RegI16, b16);
TEST_REGCLS(RegI32, b32);
@@ -106,16 +74,16 @@ static const char *getRegisterTypeName(unsigned RegNo) {
static const char *getStateSpaceName(unsigned addressSpace) {
switch (addressSpace) {
default: llvm_unreachable("Unknown state space");
- case PTX::GLOBAL: return "global";
- case PTX::CONSTANT: return "const";
- case PTX::LOCAL: return "local";
- case PTX::PARAMETER: return "param";
- case PTX::SHARED: return "shared";
+ case PTXStateSpace::Global: return "global";
+ case PTXStateSpace::Constant: return "const";
+ case PTXStateSpace::Local: return "local";
+ case PTXStateSpace::Parameter: return "param";
+ case PTXStateSpace::Shared: return "shared";
}
return NULL;
}
-static const char *getTypeName(const Type* type) {
+static const char *getTypeName(Type* type) {
while (true) {
switch (type->getTypeID()) {
default: llvm_unreachable("Unknown type");
@@ -130,7 +98,7 @@ static const char *getTypeName(const Type* type) {
}
case Type::ArrayTyID:
case Type::PointerTyID:
- type = dyn_cast<const SequentialType>(type)->getElementType();
+ type = dyn_cast<SequentialType>(type)->getElementType();
break;
}
}
@@ -170,6 +138,7 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
{
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+ // Emit the PTX .version and .target attributes
OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
(ST.supportsDouble() ? ""
@@ -203,177 +172,118 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
EmitVariableDeclaration(i);
}
-bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- SetupMachineFunction(MF);
- EmitFunctionDeclaration();
- EmitFunctionBody();
- return false;
-}
-
void PTXAsmPrinter::EmitFunctionBodyStart() {
OutStreamer.EmitRawText(Twine("{"));
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
+
+ // Print register definitions
+ std::string regDefs;
+ unsigned numRegs;
+
+ // pred
+ numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .pred %p<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i16
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b16 %rh<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i32
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b32 %r<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i64
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b64 %rd<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // f32
+ numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .f32 %f<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // f64
+ numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .f64 %fd<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
- // Print local variable definition
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) {
- unsigned reg = *i;
-
- std::string def = "\t.reg .";
- def += getRegisterTypeName(reg);
- def += ' ';
- def += getRegisterName(reg);
- def += ';';
- OutStreamer.EmitRawText(Twine(def));
+ // Local params
+ for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
+ i != e; ++i) {
+ regDefs += "\t.param .b";
+ regDefs += utostr(PM.getParamSize(*i));
+ regDefs += " ";
+ regDefs += PM.getParamName(*i);
+ regDefs += ";\n";
}
+ OutStreamer.EmitRawText(Twine(regDefs));
+
+
const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
<< " frame object(s)\n");
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
if (FrameInfo->getObjectSize(i) > 0) {
- std::string def = "\t.reg .b";
- def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
- def += " s";
+ std::string def = "\t.local .align ";
+ def += utostr(FrameInfo->getObjectAlignment(i));
+ def += " .b8";
+ def += " __local";
def += utostr(i);
+ def += "[";
+ def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
+ def += "]";
def += ";";
OutStreamer.EmitRawText(Twine(def));
}
}
-}
-
-void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- std::string str;
- str.reserve(64);
-
- raw_string_ostream OS(str);
-
- DebugLoc DL = MI->getDebugLoc();
- if (!DL.isUnknown()) {
-
- const MDNode *S = DL.getScope(MF->getFunction()->getContext());
-
- // This is taken from DwarfDebug.cpp, which is conveniently not a public
- // LLVM class.
- StringRef Fn;
- StringRef Dir;
- unsigned Src = 1;
- if (S) {
- DIDescriptor Scope(S);
- if (Scope.isCompileUnit()) {
- DICompileUnit CU(S);
- Fn = CU.getFilename();
- Dir = CU.getDirectory();
- } else if (Scope.isFile()) {
- DIFile F(S);
- Fn = F.getFilename();
- Dir = F.getDirectory();
- } else if (Scope.isSubprogram()) {
- DISubprogram SP(S);
- Fn = SP.getFilename();
- Dir = SP.getDirectory();
- } else if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(S);
- Fn = DB.getFilename();
- Dir = DB.getDirectory();
- } else
- assert(0 && "Unexpected scope info");
-
- Src = GetOrCreateSourceID(Fn, Dir);
- }
- OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(),
- 0, 0, 0, Fn);
-
- const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc();
-
- OS << "\t.loc ";
- OS << utostr(MDL.getFileNum());
- OS << " ";
- OS << utostr(MDL.getLine());
- OS << " ";
- OS << utostr(MDL.getColumn());
- OS << "\n";
- }
-
-
- // Emit predicate
- printPredicateOperand(MI, OS);
-
- // Write instruction to str
- printInstruction(MI, OS);
- OS << ';';
- OS.flush();
-
- StringRef strref = StringRef(str);
- OutStreamer.EmitRawText(strref);
-}
-
-void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS) {
- const MachineOperand &MO = MI->getOperand(opNum);
-
- switch (MO.getType()) {
- default:
- llvm_unreachable("<unknown operand type>");
- break;
- case MachineOperand::MO_GlobalAddress:
- OS << *Mang->getSymbol(MO.getGlobal());
- break;
- case MachineOperand::MO_Immediate:
- OS << (long) MO.getImm();
- break;
- case MachineOperand::MO_MachineBasicBlock:
- OS << *MO.getMBB()->getSymbol();
- break;
- case MachineOperand::MO_Register:
- OS << getRegisterName(MO.getReg());
- break;
- case MachineOperand::MO_FPImmediate:
- APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
- bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
- // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
- if (isFloat) {
- OS << "0F";
- }
- else {
- OS << "0D";
- }
- // Emit the encoded floating-point value.
- if (constFP.getZExtValue() > 0) {
- OS << constFP.toString(16, false);
- }
- else {
- OS << "00000000";
- // If We have a double-precision zero, pad to 8-bytes.
- if (!isFloat) {
- OS << "00000000";
- }
- }
- break;
- }
-}
-
-void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- printOperand(MI, opNum, OS);
- if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
- return; // don't print "+0"
-
- OS << "+";
- printOperand(MI, opNum+1, OS);
+ //unsigned Index = 1;
+ // Print parameter passing params
+ //for (PTXMachineFunctionInfo::param_iterator
+ // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
+ // std::string def = "\t.param .b";
+ // def += utostr(*i);
+ // def += " __ret_";
+ // def += utostr(Index);
+ // Index++;
+ // def += ";";
+ // OutStreamer.EmitRawText(Twine(def));
+ //}
}
-void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+void PTXAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText(Twine("}"));
}
-void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+ LowerPTXMachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
}
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
@@ -400,14 +310,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
unsigned alignment = gv->getAlignment();
if (alignment != 0) {
decl += ".align ";
- decl += utostr(Log2_32(gv->getAlignment()));
+ decl += utostr(gv->getAlignment());
decl += " ";
}
if (PointerType::classof(gv->getType())) {
- const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType());
- const Type* elementTy = pointerTy->getElementType();
+ PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
+ Type* elementTy = pointerTy->getElementType();
decl += ".b8 ";
decl += gvsym->getName();
@@ -417,14 +327,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
{
assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
- const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy);
+ ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
unsigned numElements = arrayTy->getNumElements();
while (elementTy->isArrayTy()) {
- arrayTy = dyn_cast<const ArrayType>(elementTy);
+ arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
numElements *= arrayTy->getNumElements();
@@ -447,7 +357,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
if (gv->hasInitializer())
{
- const Constant *C = gv->getInitializer();
+ const Constant *C = gv->getInitializer();
if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
{
decl += " = {";
@@ -484,7 +394,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
OutStreamer.AddBlankLine();
}
-void PTXAsmPrinter::EmitFunctionDeclaration() {
+void PTXAsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
if (!CurrentFnSym->isUndefined()) {
@@ -494,25 +404,39 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
}
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
const bool isKernel = MFI->isKernel();
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+ const MachineRegisterInfo& MRI = MF->getRegInfo();
std::string decl = isKernel ? ".entry" : ".func";
- unsigned cnt = 0;
-
if (!isKernel) {
decl += " (";
- for (PTXMachineFunctionInfo::ret_iterator
- i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
- i != e; ++i) {
- if (i != b) {
- decl += ", ";
+ if (ST.useParamSpaceForDeviceArgs()) {
+ for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
+ b = i; i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
+ decl += ".param .b";
+ decl += utostr(PM.getParamSize(*i));
+ decl += " ";
+ decl += PM.getParamName(*i);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+ decl += ".reg .";
+ decl += getRegisterTypeName(*i, MRI);
+ decl += " ";
+ decl += MFI->getRegisterName(*i);
}
- decl += ".reg .";
- decl += getRegisterTypeName(*i);
- decl += " ";
- decl += getRegisterName(*i);
}
decl += ")";
}
@@ -523,26 +447,65 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
decl += " (";
- cnt = 0;
+ const Function *F = MF->getFunction();
// Print parameters
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
- i != e; ++i) {
- if (i != b) {
- decl += ", ";
- }
- if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
decl += ".param .b";
- decl += utostr(*i);
+ decl += utostr(PM.getParamSize(*i));
decl += " ";
- decl += PARAM_PREFIX;
- decl += utostr(++cnt);
- } else {
+ decl += PM.getParamName(*i);
+ }*/
+ int Counter = 1;
+ for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b)
+ decl += ", ";
+ const Type *ArgType = (*i).getType();
+ decl += ".param .b";
+ if (ArgType->isPointerTy()) {
+ if (ST.is64Bit())
+ decl += "64";
+ else
+ decl += "32";
+ } else {
+ decl += utostr(ArgType->getPrimitiveSizeInBits());
+ }
+ if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
+ const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
+ decl += " .ptr";
+ switch (PtrType->getAddressSpace()) {
+ default:
+ llvm_unreachable("Unknown address space in argument");
+ case PTXStateSpace::Global:
+ decl += " .global";
+ break;
+ case PTXStateSpace::Shared:
+ decl += " .shared";
+ break;
+ }
+ }
+ decl += " __param_";
+ decl += utostr(Counter++);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
decl += ".reg .";
- decl += getRegisterTypeName(*i);
+ decl += getRegisterTypeName(*i, MRI);
decl += " ";
- decl += getRegisterName(*i);
+ decl += MFI->getRegisterName(*i);
}
}
decl += ")";
@@ -550,25 +513,6 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
OutStreamer.EmitRawText(Twine(decl));
}
-void PTXAsmPrinter::
-printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
- int i = MI->findFirstPredOperandIdx();
- if (i == -1)
- llvm_unreachable("missing predicate operand");
-
- unsigned reg = MI->getOperand(i).getReg();
- int predOp = MI->getOperand(i+1).getImm();
-
- DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n");
-
- if (reg != PTX::NoRegister) {
- O << '@';
- if (predOp == PTX::PRED_NEGATE)
- O << '!';
- O << getRegisterName(reg);
- }
-}
-
unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
StringRef DirName) {
// If FE did not provide a file name, then assume stdin.
@@ -596,10 +540,58 @@ unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
return SrcId;
}
-#include "PTXGenAsmWriter.inc"
+MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
+ MCOperand MCOp;
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const MCExpr *Expr;
+ const char *RegSymbolName;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unknown operand type");
+ case MachineOperand::MO_Register:
+ // We create register operands as symbols, since the PTXInstPrinter class
+ // has no way to map virtual registers back to a name without some ugly
+ // hacks.
+ // FIXME: Figure out a better way to handle virtual register naming.
+ RegSymbolName = MFI->getRegisterName(MO.getReg());
+ Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
+ OutContext);
+ MCOp = MCOperand::CreateExpr(Expr);
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_FPImmediate:
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+
+ return MCOp;
+}
// Force static initialization.
extern "C" void LLVMInitializePTXAsmPrinter() {
RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h
new file mode 100644
index 0000000..538c080
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h
@@ -0,0 +1,57 @@
+//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PTX Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXASMPRINTER_H
+#define PTXASMPRINTER_H
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter {
+public:
+ explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const { return "PTX Assembly Printer"; }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitStartOfAsmFile(Module &M);
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ unsigned GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName);
+
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCOperand lowerOperand(const MachineOperand &MO);
+
+private:
+ void EmitVariableDeclaration(const GlobalVariable *gv);
+ void EmitFunctionDeclaration();
+
+ StringMap<unsigned> SourceIdMap;
+}; // class PTXAsmPrinter
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td
deleted file mode 100644
index 3e3ff48..0000000
--- a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td
+++ /dev/null
@@ -1,29 +0,0 @@
-
-//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the PTX architecture.
-//
-//===----------------------------------------------------------------------===//
-
-// PTX Formal Parameter Calling Convention
-def CC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
- CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
- CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
- CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
-]>;
-
-// PTX Return Value Calling Convention
-def RetCC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
- CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
- CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
- CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
-]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp
new file mode 100644
index 0000000..0b653e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -0,0 +1,179 @@
+//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a machine function pass that sets appropriate FP rounding
+// modes for all relevant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-fp-rounding-mode"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXFPRoundingModePass should be executed just before emission.
+
+namespace llvm {
+ /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
+ /// all FP instructions. Essentially, this pass just looks for all FP
+ /// instructions that have a rounding mode set to RndDefault, and sets an
+ /// appropriate rounding mode based on the target device.
+ ///
+ class PTXFPRoundingModePass : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ typedef std::pair<unsigned, unsigned> RndModeDesc;
+
+ PTXTargetMachine& TargetMachine;
+ DenseMap<unsigned, RndModeDesc> Instrs;
+
+ public:
+ PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID),
+ TargetMachine(TM) {
+ initializeMap();
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX FP Rounding Mode Pass";
+ }
+
+ private:
+
+ void initializeMap();
+ void processInstruction(MachineInstr &MI);
+ }; // class PTXFPRoundingModePass
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXFPRoundingModePass::ID = 0;
+
+bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
+ // Look at each basic block
+ for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
+ ++bbi) {
+ MachineBasicBlock &MBB = *bbi;
+ // Look at each instruction
+ for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
+ ii != ie; ++ii) {
+ MachineInstr &MI = *ii;
+ processInstruction(MI);
+ }
+ }
+ return false;
+}
+
+void PTXFPRoundingModePass::initializeMap() {
+ using namespace PTXRoundingMode;
+ const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
+
+ // Build a map of default rounding mode for all instructions that need a
+ // rounding mode.
+ Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
+
+ unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
+
+ unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
+
+ Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
+
+ Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+
+ Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+}
+
+void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
+ // Is this an instruction that needs a rounding mode?
+ if (Instrs.count(MI.getOpcode())) {
+ const RndModeDesc &Desc = Instrs[MI.getOpcode()];
+ // Get the rounding mode operand
+ MachineOperand &Op = MI.getOperand(Desc.first);
+ // Update the rounding mode if needed
+ if (Op.getImm() == PTXRoundingMode::RndDefault) {
+ Op.setImm(Desc.second);
+ }
+ }
+}
+
+FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXFPRoundingModePass(TM, OptLevel);
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
index 9adfa62..5c7ee29 100644
--- a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Support/Debug.h"
@@ -37,6 +39,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
// Include the pieces auto'gened from the target description
#include "PTXGenDAGISel.inc"
@@ -46,6 +49,10 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
// pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
SDNode *SelectBRCOND(SDNode *Node);
+ SDNode *SelectREADPARAM(SDNode *Node);
+ SDNode *SelectWRITEPARAM(SDNode *Node);
+ SDNode *SelectFrameIndex(SDNode *Node);
+
bool isImm(const SDValue &operand);
bool SelectImm(const SDValue &operand, SDValue &imm);
@@ -68,6 +75,12 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
switch (Node->getOpcode()) {
case ISD::BRCOND:
return SelectBRCOND(Node);
+ case PTXISD::READ_PARAM:
+ return SelectREADPARAM(Node);
+ case PTXISD::WRITE_PARAM:
+ return SelectWRITEPARAM(Node);
+ case ISD::FrameIndex:
+ return SelectFrameIndex(Node);
default:
return SelectCode(Node);
}
@@ -79,7 +92,7 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
SDValue Chain = Node->getOperand(0);
SDValue Pred = Node->getOperand(1);
SDValue Target = Node->getOperand(2); // branch target
- SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32);
DebugLoc dl = Node->getDebugLoc();
assert(Target.getOpcode() == ISD::BasicBlock);
@@ -90,6 +103,97 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
}
+SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Index = Node->getOperand(1);
+
+ int OpCode;
+
+ // Get the type of parameter we are reading
+ EVT VT = Node->getValueType(0);
+ assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::READPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::READPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::READPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::READPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::READPARAMF32;
+ else {
+ assert(Type == MVT::f64 && "Unexpected type!");
+ OpCode = PTX::READPARAMF64;
+ }
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Index, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
+}
+
+SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Value = Node->getOperand(1);
+
+ int OpCode;
+
+ //Node->dumpr(CurDAG);
+
+ // Get the type of parameter we are writing
+ EVT VT = Value->getValueType(0);
+ assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::WRITEPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::WRITEPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::WRITEPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::WRITEPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::WRITEPARAMF32;
+ else if (Type == MVT::f64)
+ OpCode = PTX::WRITEPARAMF64;
+ else
+ llvm_unreachable("Invalid type in SelectWRITEPARAM");
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Value, Pred, PredOp, Chain };
+ SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
+
+ //dbgs() << "SelectWRITEPARAM produced:\n\t";
+ //Ret->dumpr(CurDAG);
+
+ return Ret;
+}
+
+SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ //dbgs() << "Selecting FrameIndex at index " << FI << "\n";
+ //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
+
+ PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+ SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI),
+ Node->getValueType(0));
+
+ return FrameSymbol.getNode();
+}
+
// Match memory operand of the form [reg+reg]
bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
@@ -107,14 +211,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
- if (Addr.getOpcode() != ISD::ADD) {
+ // FrameIndex addresses are handled separately
+ //errs() << "SelectADDRri: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Success\n";
+ return true;
+ }
+
+ /*if (Addr.getNumOperands() == 1) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ errs() << "Success\n";
+ return true;
+ }*/
+
+ //errs() << "SelectADDRri fails on: ";
+ //Addr.getNode()->dumpr();
+
+ if (isImm(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ //errs() << "Success\n";
+ return true;
+
+ /*if (Addr.getOpcode() != ISD::ADD) {
// let SelectADDRii handle the [imm] case
if (isImm(Addr))
return false;
// it is [reg]
assert(Addr.getValueType().isSimple() && "Type must be simple");
-
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
@@ -136,7 +280,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
}
// neither [reg+imm] nor [imm+reg]
- return false;
+ return false;*/
}
// Match memory operand of the form [imm+imm] and [imm]
@@ -160,6 +304,36 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
return false;
}
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ //errs() << "SelectADDRlocal: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ //errs() << "Success\n";
+ return true;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (!isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Offset: ";
+ //Offset.getNode()->dumpr();
+ //errs() << "Success\n";
+ return true;
+ }
+
+ //errs() << "Failure\n";
+ return false;
+}
+
bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
return ConstantSDNode::classof(operand.getNode());
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
index 6fcf710..3307d91 100644
--- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
@@ -16,23 +16,19 @@
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "PTXSubtarget.h"
+#include "llvm/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "PTXGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
@@ -47,57 +43,58 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setMinFunctionAlignment(2);
-
+
////////////////////////////////////
/////////// Expansion //////////////
////////////////////////////////////
-
+
// (any/zero/sign) extload => load + (any/zero/sign) extend
-
+
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
-
+
// f32 extload => load + fextend
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
// f64 truncstore => trunc + store
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// sign_extend_inreg => sign_extend
-
+
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
+
// br_cc => brcond
-
+
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
// select_cc => setcc
-
+
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
-
+
////////////////////////////////////
//////////// Legal /////////////////
////////////////////////////////////
-
+
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
+
////////////////////////////////////
//////////// Custom ////////////////
////////////////////////////////////
-
+
// customise setcc to use bitwise logic if possible
-
+
setOperationAction(ISD::SETCC, MVT::i1, Custom);
// customize translation of memory addresses
-
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -105,7 +102,7 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
computeRegisterProperties();
}
-MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
@@ -130,10 +127,16 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "PTXISD::LOAD_PARAM";
case PTXISD::STORE_PARAM:
return "PTXISD::STORE_PARAM";
+ case PTXISD::READ_PARAM:
+ return "PTXISD::READ_PARAM";
+ case PTXISD::WRITE_PARAM:
+ return "PTXISD::WRITE_PARAM";
case PTXISD::EXIT:
return "PTXISD::EXIT";
case PTXISD::RET:
return "PTXISD::RET";
+ case PTXISD::CALL:
+ return "PTXISD::CALL";
}
}
@@ -149,7 +152,7 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- // Look for X == 0, X == 1, X != 0, or X != 1
+ // Look for X == 0, X == 1, X != 0, or X != 1
// We can simplify these to bitwise logic
if (Op1.getOpcode() == ISD::Constant &&
@@ -197,6 +200,7 @@ SDValue PTXTargetLowering::
MachineFunction &MF = DAG.getMachineFunction();
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
switch (CallConv) {
default:
@@ -216,68 +220,34 @@ SDValue PTXTargetLowering::
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
// We just need to emit the proper LOAD_PARAM ISDs
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-
assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
"Kernels cannot take pred operands");
+ unsigned ParamSize = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addArgumentParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
- DAG.getTargetConstant(i, MVT::i32));
+ ParamValue);
InVals.push_back(ArgValue);
-
- // Instead of storing a physical register in our argument list, we just
- // store the total size of the parameter, in bits. The ASM printer
- // knows how to process this.
- MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
}
}
else {
- // For device functions, we use the PTX calling convention to do register
- // assignments then create CopyFromReg ISDs for the allocated registers
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
- *DAG.getContext());
-
- CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
-
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-
- CCValAssign& VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- TargetRegisterClass* TRC = 0;
-
- assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
-
- // Determine which register class we need
- if (RegVT == MVT::i1) {
- TRC = PTX::RegPredRegisterClass;
- }
- else if (RegVT == MVT::i16) {
- TRC = PTX::RegI16RegisterClass;
- }
- else if (RegVT == MVT::i32) {
- TRC = PTX::RegI32RegisterClass;
- }
- else if (RegVT == MVT::i64) {
- TRC = PTX::RegI64RegisterClass;
- }
- else if (RegVT == MVT::f32) {
- TRC = PTX::RegF32RegisterClass;
- }
- else if (RegVT == MVT::f64) {
- TRC = PTX::RegF64RegisterClass;
- }
- else {
- llvm_unreachable("Unknown parameter type");
- }
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT RegVT = Ins[i].VT;
+ TargetRegisterClass* TRC = getRegClassFor(RegVT);
+ // Use a unique index in the instruction to prevent instruction folding.
+ // Yes, this is a hack.
+ SDValue Index = DAG.getTargetConstant(i, MVT::i32);
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
- MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+ SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
+ Index);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
InVals.push_back(ArgValue);
- MFI->addArgReg(VA.getLocReg());
+ MFI->addArgReg(Reg);
}
}
@@ -301,41 +271,66 @@ SDValue PTXTargetLowering::
assert(Outs.size() == 0 && "Kernel must return void.");
return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
case CallingConv::PTX_Device:
- //assert(Outs.size() <= 1 && "Can at most return one value.");
+ assert(Outs.size() <= 1 && "Can at most return one value.");
break;
}
MachineFunction& MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
SDValue Flag;
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
- // Even though we could use the .param space for return arguments for
- // device functions if SM >= 2.0 and the number of return arguments is
- // only 1, we just always use registers since this makes the codegen
- // easier.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
-
- CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
-
- for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
- CCValAssign& VA = RVLocs[i];
-
- assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+ if (ST.useParamSpaceForDeviceArgs()) {
+ assert(Outs.size() < 2 && "Device functions can return at most one value");
+
+ if (Outs.size() == 1) {
+ unsigned ParamSize = OutVals[0].getValueType().getSizeInBits();
+ unsigned Param = PM.addReturnParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[0]);
+ }
+ } else {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT RegVT = Outs[i].VT;
+ TargetRegisterClass* TRC = 0;
- unsigned Reg = VA.getLocReg();
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ TRC = PTX::RegPredRegisterClass;
+ }
+ else if (RegVT == MVT::i16) {
+ TRC = PTX::RegI16RegisterClass;
+ }
+ else if (RegVT == MVT::i32) {
+ TRC = PTX::RegI32RegisterClass;
+ }
+ else if (RegVT == MVT::i64) {
+ TRC = PTX::RegI64RegisterClass;
+ }
+ else if (RegVT == MVT::f32) {
+ TRC = PTX::RegF32RegisterClass;
+ }
+ else if (RegVT == MVT::f64) {
+ TRC = PTX::RegF64RegisterClass;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
- DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
- Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+ SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
+ SDValue OutReg = DAG.getRegister(Reg, RegVT);
- // Guarantee that all emitted copies are stuck together,
- // avoiding something bad
- Flag = Chain.getValue(1);
+ Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
- MFI->addRetReg(Reg);
+ MFI->addRetReg(Reg);
+ }
}
if (Flag.getNode() == 0) {
@@ -345,3 +340,83 @@ SDValue PTXTargetLowering::
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
}
}
+
+SDValue
+PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
+
+ assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
+ "Calls are not handled for the target device");
+
+ std::vector<SDValue> Ops;
+ // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
+ Ops.resize(Outs.size() + Ins.size() + 4);
+
+ Ops[0] = Chain;
+
+ // Identify the callee function
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device &&
+ "PTX function calls must be to PTX device functions");
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ Ops[Ins.size()+2] = Callee;
+
+ // Generate STORE_PARAM nodes for each function argument. In PTX, function
+ // arguments are explicitly stored into .param variables and passed as
+ // arguments. There is no register/stack-based calling convention in PTX.
+ Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32);
+ for (unsigned i = 0; i != OutVals.size(); ++i) {
+ unsigned Size = OutVals[i].getValueType().getSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[i]);
+ Ops[i+Ins.size()+4] = ParamValue;
+ }
+
+ std::vector<SDValue> InParams;
+
+ // Generate list of .param variables to hold the return value(s).
+ Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32);
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ unsigned Size = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Ops[i+2] = ParamValue;
+ InParams.push_back(ParamValue);
+ }
+
+ Ops[0] = Chain;
+
+ // Create the CALL node.
+ Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size());
+
+ // Create the LOAD_PARAM nodes that retrieve the function return value(s).
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+ InParams[i]);
+ InVals.push_back(Load);
+ }
+
+ return Chain;
+}
+
+unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) {
+ // All arguments consist of one "register," regardless of the type.
+ return 1;
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
index 4318541..4d25665 100644
--- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
@@ -26,9 +26,12 @@ namespace PTXISD {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
LOAD_PARAM,
STORE_PARAM,
+ READ_PARAM,
+ WRITE_PARAM,
EXIT,
RET,
- COPY_ADDRESS
+ COPY_ADDRESS,
+ CALL
};
} // namespace PTXISD
@@ -60,7 +63,19 @@ class PTXTargetLowering : public TargetLowering {
DebugLoc dl,
SelectionDAG &DAG) const;
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT);
private:
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
index 8cee351..397fdc3 100644
--- a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
@@ -7,12 +7,39 @@
//
//===----------------------------------------------------------------------===//
-// PTX Predicate operand, default to (0, 0) = (zero-reg, always).
+
+// Rounding Mode Specifier
+/*class RoundingMode<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def RndDefault : RoundingMode<0>;
+def RndNearestEven : RoundingMode<1>;
+def RndNearestZero : RoundingMode<2>;
+def RndNegInf : RoundingMode<3>;
+def RndPosInf : RoundingMode<4>;
+def RndApprox : RoundingMode<5>;*/
+
+
+// Rounding Mode Operand
+def RndMode : Operand<i32> {
+ let PrintMethod = "printRoundingMode";
+}
+
+def RndDefault : PatLeaf<(i32 0)>;
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, none).
// Leave PrintMethod empty; predicate printing is defined elsewhere.
def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
- (ops (i1 zero_reg), (i32 0))>;
+ (ops (i1 zero_reg), (i32 2))>;
+def RndModeOperand : Operand<OtherVT> {
+ let MIOperandInfo = (ops i32imm);
+}
+
+// Instruction Types
let Namespace = "PTX" in {
+
class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
: Instruction {
dag OutOperandList = oops;
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
index 425265a..1b947a5 100644
--- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
@@ -16,10 +16,11 @@
#include "PTX.h"
#include "PTXInstrInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRINFO_CTOR
@@ -47,8 +48,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const {
- for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
- if (map[i].cls->contains(DstReg, SrcReg)) {
+
+ const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
+ // "Invalid register copy between two register classes");
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
+ if (map[i].cls == MRI.getRegClass(DstReg)) {
const MCInstrDesc &MCID = get(map[i].opcode);
MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
addReg(SrcReg, getKillRegState(KillSrc));
@@ -161,7 +167,7 @@ DefinesPredicate(MachineInstr *MI,
return false;
Pred.push_back(MO);
- Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None));
return true;
}
@@ -277,7 +283,7 @@ InsertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(PTX::BRAdp))
.addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(PTX::BRAd))
- .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
return 2;
} else if (Cond.size()) {
BuildMI(&MBB, DL, get(PTX::BRAdp))
@@ -285,7 +291,7 @@ InsertBranch(MachineBasicBlock &MBB,
return 1;
} else {
BuildMI(&MBB, DL, get(PTX::BRAd))
- .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
return 1;
}
}
@@ -296,34 +302,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineInstr& MI = *MII;
- DebugLoc DL = MI.getDebugLoc();
-
- DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
-
- int OpCode;
-
- // Select the appropriate opcode based on the register class
- if (RC == PTX::RegI16RegisterClass) {
- OpCode = PTX::STACKSTOREI16;
- } else if (RC == PTX::RegI32RegisterClass) {
- OpCode = PTX::STACKSTOREI32;
- } else if (RC == PTX::RegI64RegisterClass) {
- OpCode = PTX::STACKSTOREI32;
- } else if (RC == PTX::RegF32RegisterClass) {
- OpCode = PTX::STACKSTOREF32;
- } else if (RC == PTX::RegF64RegisterClass) {
- OpCode = PTX::STACKSTOREF64;
- } else {
- llvm_unreachable("Unknown PTX register class!");
- }
-
- // Build the store instruction (really a mov)
- MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
- MIB.addFrameIndex(FrameIdx);
- MIB.addReg(SrcReg);
-
- AddDefaultPredicate(MIB);
+ assert(false && "storeRegToStackSlot should not be called for PTX");
}
void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -331,34 +310,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineInstr& MI = *MII;
- DebugLoc DL = MI.getDebugLoc();
-
- DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
-
- int OpCode;
-
- // Select the appropriate opcode based on the register class
- if (RC == PTX::RegI16RegisterClass) {
- OpCode = PTX::STACKLOADI16;
- } else if (RC == PTX::RegI32RegisterClass) {
- OpCode = PTX::STACKLOADI32;
- } else if (RC == PTX::RegI64RegisterClass) {
- OpCode = PTX::STACKLOADI32;
- } else if (RC == PTX::RegF32RegisterClass) {
- OpCode = PTX::STACKLOADF32;
- } else if (RC == PTX::RegF64RegisterClass) {
- OpCode = PTX::STACKLOADF64;
- } else {
- llvm_unreachable("Unknown PTX register class!");
- }
-
- // Build the load instruction (really a mov)
- MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
- MIB.addReg(DestReg);
- MIB.addFrameIndex(FrameIdx);
-
- AddDefaultPredicate(MIB);
+ assert(false && "loadRegFromStackSlot should not be called for PTX");
}
// static helper routines
@@ -367,7 +319,7 @@ MachineSDNode *PTXInstrInfo::
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT, SDValue Op1) {
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
SDValue ops[] = { Op1, predReg, predOp };
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
}
@@ -376,7 +328,7 @@ MachineSDNode *PTXInstrInfo::
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
SDValue ops[] = { Op1, Op2, predReg, predOp };
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
}
@@ -384,7 +336,7 @@ GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
if (MI->findFirstPredOperandIdx() == -1) {
MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
- MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None));
}
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
index 6bfe906..a3fcea9 100644
--- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
@@ -21,10 +21,6 @@ include "PTXInstrFormats.td"
// Code Generation Predicates
//===----------------------------------------------------------------------===//
-// Addressing
-def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
-def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
-
// Shader Model Support
def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
@@ -43,130 +39,19 @@ def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
-//===----------------------------------------------------------------------===//
-// Instruction Pattern Stuff
-//===----------------------------------------------------------------------===//
-def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::GLOBAL;
- return false;
-}]>;
-
-def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::CONSTANT;
- return false;
-}]>;
-
-def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::LOCAL;
- return false;
-}]>;
-
-def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::PARAMETER;
- return false;
-}]>;
-
-def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::SHARED;
- return false;
-}]>;
-
-def store_global
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::GLOBAL;
- return false;
-}]>;
-
-def store_local
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::LOCAL;
- return false;
-}]>;
-
-def store_parameter
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::PARAMETER;
- return false;
-}]>;
-
-def store_shared
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::SHARED;
- return false;
-}]>;
-
-// Addressing modes.
-def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
-def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
-def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
-def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
-def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
-
-// Address operands
-def MEMri32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RegI32, i32imm);
-}
-def MEMri64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RegI64, i64imm);
-}
-def MEMii32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i32imm, i32imm);
-}
-def MEMii64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i64imm, i64imm);
-}
-// The operand here does not correspond to an actual address, so we
-// can use i32 in 64-bit address modes.
-def MEMpi : Operand<i32> {
- let PrintMethod = "printParamOperand";
- let MIOperandInfo = (ops i32imm);
-}
-def MEMret : Operand<i32> {
- let PrintMethod = "printReturnOperand";
- let MIOperandInfo = (ops i32imm);
-}
+
+// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart,
+// [SDNPHasChain, SDNPOutGlue]>;
+// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd,
+// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def PTXcall : SDNode<"PTXISD::CALL", SDTNone,
+ [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>;
+
// Branch & call targets have OtherVT type.
def brtarget : Operand<OtherVT>;
@@ -189,87 +74,73 @@ def PTXret
def PTXcopyaddress
: SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
-// Load/store .param space
-def PTXloadparam
- : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
-def PTXstoreparam
- : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
+// For floating-point instructions, we cannot just embed the pattern into the
+// instruction definition since we need to muck around with the rounding mode,
+// and I do not know how to insert constants into instructions directly from
+// pattern matches.
+
//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
-multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
+multiclass PTX_FLOAT_2OP<string opcstr> {
def rr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- !strconcat(opcstr, ".f32\t$d, $a"),
- [(set RegF32:$d, (opnode RegF32:$a))]>;
+ (ins RndMode:$r, RegF32:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
def ri32 : InstPTX<(outs RegF32:$d),
- (ins f32imm:$a),
- !strconcat(opcstr, ".f32\t$d, $a"),
- [(set RegF32:$d, (opnode fpimm:$a))]>;
+ (ins RndMode:$r, f32imm:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
def rr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- !strconcat(opcstr, ".f64\t$d, $a"),
- [(set RegF64:$d, (opnode RegF64:$a))]>;
+ (ins RndMode:$r, RegF64:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
def ri64 : InstPTX<(outs RegF64:$d),
- (ins f64imm:$a),
- !strconcat(opcstr, ".f64\t$d, $a"),
- [(set RegF64:$d, (opnode fpimm:$a))]>;
+ (ins RndMode:$r, f64imm:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
}
//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
-multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
+multiclass PTX_FLOAT_3OP<string opcstr> {
def rr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- !strconcat(opcstr, ".f32\t$d, $a, $b"),
- [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
def ri32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- !strconcat(opcstr, ".f32\t$d, $a, $b"),
- [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>;
+ (ins RndMode:$r, RegF32:$a, f32imm:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
def rr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- !strconcat(opcstr, ".f64\t$d, $a, $b"),
- [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
def ri64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- !strconcat(opcstr, ".f64\t$d, $a, $b"),
- [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>;
+ (ins RndMode:$r, RegF64:$a, f64imm:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
}
//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
-multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+multiclass PTX_FLOAT_4OP<string opcstr> {
def rrr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b, RegF32:$c),
- !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
- [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
- RegF32:$b),
- RegF32:$c))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
def rri32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b, f32imm:$c),
- !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
- [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
- RegF32:$b),
- fpimm:$c))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rii32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
def rrr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b, RegF64:$c),
- !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
- [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
- RegF64:$b),
- RegF64:$c))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
def rri64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b, f64imm:$c),
- !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
- [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
- RegF64:$b),
- fpimm:$c))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+ def rii64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
}
-multiclass INT3<string opcstr, SDNode opnode> {
+//===- Integer Instructions - 3 Operand Form ------------------------------===//
+multiclass PTX_INT3<string opcstr, SDNode opnode> {
def rr16 : InstPTX<(outs RegI16:$d),
(ins RegI16:$a, RegI16:$b),
!strconcat(opcstr, ".u16\t$d, $a, $b"),
@@ -296,6 +167,35 @@ multiclass INT3<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
}
+//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===//
+multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===//
multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
def ripreds : InstPTX<(outs RegPred:$d),
(ins RegPred:$a, i1imm:$b),
@@ -331,7 +231,8 @@ multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
}
-multiclass INT3ntnc<string opcstr, SDNode opnode> {
+//===- Integer Shift Instructions - 3 Operand Form ------------------------===//
+multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> {
def rr16 : InstPTX<(outs RegI16:$d),
(ins RegI16:$a, RegI16:$b),
!strconcat(opcstr, "16\t$d, $a, $b"),
@@ -370,6 +271,7 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
}
+//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===//
multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
CondCode cmp, string cmpstr> {
// TODO support 5-operand format: p|q, a, b, c
@@ -385,56 +287,77 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
def rr_and_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_and_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
def rr_or_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_or_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
def rr_xor_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_xor_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
def rr_and_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_and_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_or_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_xor_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
}
-multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===//
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls,
CondCode ucmp, CondCode ocmp, string cmpstr> {
// TODO support 5-operand format: p|q, a, b, c
@@ -447,137 +370,110 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
!strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
[(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+ def ri_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>;
+ def ri_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>;
+
def rr_and_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
def rr_and_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
def rr_or_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
def rr_or_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
def rr_xor_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
def rr_xor_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
def rr_and_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_and_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
}
-multiclass PTX_SELP<RegisterClass RC, string regclsname> {
+//===- Select Predicate Instructions - 4 Operand Form ---------------------===//
+multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls,
+ SDNode immnode> {
def rr
: InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
!strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
[(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
+ def ri
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>;
+ def ii
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>;
}
-multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
- def rr32 : InstPTX<(outs RC:$d),
- (ins MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
- def rr64 : InstPTX<(outs RC:$d),
- (ins MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
- def ri32 : InstPTX<(outs RC:$d),
- (ins MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
- def ri64 : InstPTX<(outs RC:$d),
- (ins MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
- def ii32 : InstPTX<(outs RC:$d),
- (ins MEMii32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
- def ii64 : InstPTX<(outs RC:$d),
- (ins MEMii64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
-}
-
-multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
- defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
- defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
- defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
- defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
- defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
-}
-
-multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
- def rr32 : InstPTX<(outs),
- (ins RC:$d, MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
- def rr64 : InstPTX<(outs),
- (ins RC:$d, MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
- def ri32 : InstPTX<(outs),
- (ins RC:$d, MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
- def ri64 : InstPTX<(outs),
- (ins RC:$d, MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
- def ii32 : InstPTX<(outs),
- (ins RC:$d, MEMii32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
- def ii64 : InstPTX<(outs),
- (ins RC:$d, MEMii64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
-}
-multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
- defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
- defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
- defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
- defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
- defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
-}
//===----------------------------------------------------------------------===//
// Instructions
@@ -585,118 +481,61 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
///===- Integer Arithmetic Instructions -----------------------------------===//
-defm ADD : INT3<"add", add>;
-defm SUB : INT3<"sub", sub>;
-defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
-defm DIV : INT3<"div", udiv>;
-defm REM : INT3<"rem", urem>;
+defm ADD : PTX_INT3<"add", add>;
+defm SUB : PTX_INT3<"sub", sub>;
+defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : PTX_INT3<"div", udiv>;
+defm SDIV : PTX_INT3_SIGNED<"div", sdiv>;
+defm REM : PTX_INT3<"rem", urem>;
///===- Floating-Point Arithmetic Instructions ----------------------------===//
-// Standard Unary Operations
-defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
+// FNEG
+defm FNEG : PTX_FLOAT_2OP<"neg">;
// Standard Binary Operations
-defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
-defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
-defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
-
-// For floating-point division:
-// SM_13+ defaults to .rn for f32 and f64,
-// SM10 must *not* provide a rounding
-
-// TODO:
-// - Allow user selection of rounding modes for fdiv
-// - Add support for -prec-div=false (.approx)
-
-def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- "div.rn.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- "div.rn.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- "div.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- "div.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-
-def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- "div.rn.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- "div.rn.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- "div.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- "div.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-
-
+defm FADD : PTX_FLOAT_3OP<"add">;
+defm FSUB : PTX_FLOAT_3OP<"sub">;
+defm FMUL : PTX_FLOAT_3OP<"mul">;
+defm FDIV : PTX_FLOAT_3OP<"div">;
// Multi-operation hybrid instructions
+defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>;
-// The selection of mad/fma is tricky. In some cases, they are the *same*
-// instruction, but in other cases we may prefer one or the other. Also,
-// different PTX versions differ on whether rounding mode flags are required.
-// In the short term, mad is supported on all PTX versions and we use a
-// default rounding mode no matter what shader model or PTX version.
-// TODO: Allow the rounding mode to be selectable through llc.
-defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
- Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
-defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
- Requires<[FMadNoRoundingMode, SupportsFMA]>;
///===- Floating-Point Intrinsic Instructions -----------------------------===//
-def FSQRT32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "sqrt.rn.f32\t$d, $a",
- [(set RegF32:$d, (fsqrt RegF32:$a))]>;
-
-def FSQRT64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "sqrt.rn.f64\t$d, $a",
- [(set RegF64:$d, (fsqrt RegF64:$a))]>;
-
-def FSIN32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "sin.approx.f32\t$d, $a",
- [(set RegF32:$d, (fsin RegF32:$a))]>;
+// SQRT
+def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+
+// SIN
+def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sin$r.f64\t$d, $a", []>;
+def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sin$r.f64\t$d, $a", []>;
+
+// COS
+def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "cos$r.f64\t$d, $a", []>;
+def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "cos$r.f64\t$d, $a", []>;
-def FSIN64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "sin.approx.f64\t$d, $a",
- [(set RegF64:$d, (fsin RegF64:$a))]>;
-def FCOS32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "cos.approx.f32\t$d, $a",
- [(set RegF32:$d, (fcos RegF32:$a))]>;
-
-def FCOS64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "cos.approx.f64\t$d, $a",
- [(set RegF64:$d, (fcos RegF64:$a))]>;
///===- Comparison and Selection Instructions -----------------------------===//
@@ -744,35 +583,35 @@ defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">;
// Compare f32
-defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">;
-defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">;
-defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">;
-defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">;
-defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">;
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">;
// Compare f64
-defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">;
-defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">;
-defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">;
-defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">;
-defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">;
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">;
// .selp
-defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">;
-defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">;
-defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">;
-defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">;
-defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">;
+defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>;
+defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>;
+defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>;
+defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>;
+defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>;
///===- Logic and Shift Instructions --------------------------------------===//
-defm SHL : INT3ntnc<"shl.b", PTXshl>;
-defm SRL : INT3ntnc<"shr.u", PTXsrl>;
-defm SRA : INT3ntnc<"shr.s", PTXsra>;
+defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>;
+defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>;
defm AND : PTX_LOGIC<"and", and>;
defm OR : PTX_LOGIC<"or", or>;
@@ -780,6 +619,24 @@ defm XOR : PTX_LOGIC<"xor", xor>;
///===- Data Movement and Conversion Instructions -------------------------===//
+// any_extend
+// Implement the anyext instruction in terms of the PTX cvt instructions.
+//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>;
+
+// bitconvert
+// These instructions implement the bit-wise conversion between integer and
+// floating-point types.
+def MOVi32f32
+ : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>;
+def MOVf32i32
+ : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>;
+def MOVi64f64
+ : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>;
+def MOVf64i64
+ : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>;
+
let neverHasSideEffects = 1 in {
def MOVPREDrr
: InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
@@ -825,278 +682,332 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
[(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
}
-// Loads
-defm LDg : PTX_LD_ALL<"ld.global", load_global>;
-defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
-defm LDl : PTX_LD_ALL<"ld.local", load_local>;
-defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+// PTX cvt instructions
+// Note all of these may actually be used, we just define all possible patterns
+// here (that make sense).
+// FIXME: Can we collapse this somehow into a multiclass def?
+
+// To i16
+def CVTu16u32
+ : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>;
+def CVTu16u64
+ : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>;
+def CVTu16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u16.f32\t$d, $a", []>;
+def CVTs16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s16.f32\t$d, $a", []>;
+def CVTu16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u16.f64\t$d, $a", []>;
+def CVTs16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s16.f64\t$d, $a", []>;
+
+// To i32
+def CVTu32u16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>;
+def CVTs32s16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>;
+def CVTu32u64
+ : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>;
+def CVTu32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u32.f32\t$d, $a", []>;
+def CVTs32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s32.f32\t$d, $a", []>;
+def CVTu32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u32.f64\t$d, $a", []>;
+def CVTs32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s32.f64\t$d, $a", []>;
+
+// To i64
+def CVTu64u16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>;
+def CVTs64s16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>;
+def CVTu64u32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>;
+def CVTs64s32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>;
+def CVTu64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u64.f32\t$d, $a", []>;
+def CVTs64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s64.f32\t$d, $a", []>;
+def CVTu64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u64.f64\t$d, $a", []>;
+def CVTs64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s64.f64\t$d, $a", []>;
+
+// To f32
+def CVTf32u16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.u16\t$d, $a", []>;
+def CVTf32s16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.s16\t$d, $a", []>;
+def CVTf32u32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.u32\t$d, $a", []>;
+def CVTf32s32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.s32\t$d, $a", []>;
+def CVTf32u64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.u64\t$d, $a", []>;
+def CVTf32s64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.s64\t$d, $a", []>;
+def CVTf32f64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.f32.f64\t$d, $a", []>;
+
+// To f64
+def CVTf64u16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.u16\t$d, $a", []>;
+def CVTf64s16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.s16\t$d, $a", []>;
+def CVTf64u32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.u32\t$d, $a", []>;
+def CVTf64s32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.s32\t$d, $a", []>;
+def CVTf64u64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.u64\t$d, $a", []>;
+def CVTf64s64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.s64\t$d, $a", []>;
+def CVTf64f32
+ : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>;
+
+ ///===- Control Flow Instructions -----------------------------------------===//
-// These instructions are used to load/store from the .param space for
-// device and kernel parameters
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def BRAd
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
-let hasSideEffects = 1 in {
- def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
- "ld.param.pred\t$d, [$a]",
- [(set RegPred:$d, (PTXloadparam timm:$a))]>;
- def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
- "ld.param.u16\t$d, [$a]",
- [(set RegI16:$d, (PTXloadparam timm:$a))]>;
- def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
- "ld.param.u32\t$d, [$a]",
- [(set RegI32:$d, (PTXloadparam timm:$a))]>;
- def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
- "ld.param.u64\t$d, [$a]",
- [(set RegI64:$d, (PTXloadparam timm:$a))]>;
- def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
- "ld.param.f32\t$d, [$a]",
- [(set RegF32:$d, (PTXloadparam timm:$a))]>;
- def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
- "ld.param.f64\t$d, [$a]",
- [(set RegF64:$d, (PTXloadparam timm:$a))]>;
-
- def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
- "st.param.pred\t[$d], $a",
- [(PTXstoreparam timm:$d, RegPred:$a)]>;
- def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
- "st.param.u16\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI16:$a)]>;
- def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a),
- "st.param.u32\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI32:$a)]>;
- def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a),
- "st.param.u64\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI64:$a)]>;
- def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a),
- "st.param.f32\t[$d], $a",
- [(PTXstoreparam timm:$d, RegF32:$a)]>;
- def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a),
- "st.param.f64\t[$d], $a",
- [(PTXstoreparam timm:$d, RegF64:$a)]>;
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: The pattern part is blank because I cannot (or do not yet know
+ // how to) use the first operand of PredicateOperand (a RegPred register) here
+ def BRAdp
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+ [/*(brcond pred:$_p, bb:$d)*/]>;
}
-// Stores
-defm STg : PTX_ST_ALL<"st.global", store_global>;
-defm STl : PTX_ST_ALL<"st.local", store_local>;
-defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+ def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
+}
-// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
-// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
-// TODO: Do something with st.param if/when it is needed.
+let hasSideEffects = 1 in {
+ def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>;
+}
-// Conversion to pred
-// PTX does not directly support converting to a predicate type, so we fake it
-// by performing a greater-than test between the value and zero. This follows
-// the C convention that any non-zero value is equivalent to 'true'.
-def CVT_pred_u16
- : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI16:$a))]>;
+///===- Parameter Passing Pseudo-Instructions -----------------------------===//
+
+def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
+ "mov.pred\t$a, %param$b", []>;
+def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
+ "mov.b16\t$a, %param$b", []>;
+def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
+ "mov.b32\t$a, %param$b", []>;
+def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
+ "mov.b64\t$a, %param$b", []>;
+def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
+ "mov.f32\t$a, %param$b", []>;
+def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
+ "mov.f64\t$a, %param$b", []>;
+
+def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
+def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
+def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>;
+def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>;
+def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>;
+def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>;
-def CVT_pred_u32
- : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI32:$a))]>;
-def CVT_pred_u64
- : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI64:$a))]>;
+//===----------------------------------------------------------------------===//
+// Instruction Selection Patterns
+//===----------------------------------------------------------------------===//
-def CVT_pred_f32
- : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0",
- [(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
+// FADD
+def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)),
+ (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)),
+ (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)),
+ (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)),
+ (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FSUB
+def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)),
+ (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)),
+ (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)),
+ (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)),
+ (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL
+def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)),
+ (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)),
+ (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)),
+ (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)),
+ (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FDIV
+def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)),
+ (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)),
+ (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)),
+ (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)),
+ (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL+FADD
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)),
+ (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>;
+
+// FNEG
+def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>;
+
+// FSQRT
+def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>;
+
+// FSIN
+def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>;
+
+// FCOS
+def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>;
+
+// Type conversion notes:
+// - PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+// - PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero. This follows
+// the C convention that any non-zero value is equivalent to 'true'.
-def CVT_pred_f64
- : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0",
- [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
+// Conversion to pred
+def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>;
+def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>;
+def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>;
+def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>;
+def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>;
// Conversion to u16
-// PTX does not directly support converting a predicate to a value, so we
-// use a select instruction to select either 0 or 1 (integer or fp) based
-// on the truth value of the predicate.
-def CVT_u16_preda
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (anyext RegPred:$a))]>;
-
-def CVT_u16_pred
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (zext RegPred:$a))]>;
-
-def CVT_u16_preds
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (sext RegPred:$a))]>;
-
-def CVT_u16_u32
- : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
- [(set RegI16:$d, (trunc RegI32:$a))]>;
-
-def CVT_u16_u64
- : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a",
- [(set RegI16:$d, (trunc RegI64:$a))]>;
-
-def CVT_u16_f32
- : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
- [(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u16_f64
- : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
- [(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>;
+def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>;
+def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>;
+def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>;
+def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>;
// Conversion to u32
-
-def CVT_u32_pred
- : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
- [(set RegI32:$d, (zext RegPred:$a))]>;
-
-def CVT_u32_b16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
- [(set RegI32:$d, (anyext RegI16:$a))]>;
-
-def CVT_u32_u16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
- [(set RegI32:$d, (zext RegI16:$a))]>;
-
-def CVT_u32_preds
- : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
- [(set RegI32:$d, (sext RegPred:$a))]>;
-
-def CVT_u32_s16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
- [(set RegI32:$d, (sext RegI16:$a))]>;
-
-def CVT_u32_u64
- : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
- [(set RegI32:$d, (trunc RegI64:$a))]>;
-
-def CVT_u32_f32
- : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
- [(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u32_f64
- : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
- [(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>;
+def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>;
+def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>;
+def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>;
// Conversion to u64
-
-def CVT_u64_pred
- : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
- [(set RegI64:$d, (zext RegPred:$a))]>;
-
-def CVT_u64_preds
- : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
- [(set RegI64:$d, (sext RegPred:$a))]>;
-
-def CVT_u64_u16
- : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
- [(set RegI64:$d, (zext RegI16:$a))]>;
-
-def CVT_u64_s16
- : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
- [(set RegI64:$d, (sext RegI16:$a))]>;
-
-def CVT_u64_u32
- : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
- [(set RegI64:$d, (zext RegI32:$a))]>;
-
-def CVT_u64_s32
- : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
- [(set RegI64:$d, (sext RegI32:$a))]>;
-
-def CVT_u64_f32
- : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
- [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u64_f64
- : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
- [(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a,
+ 0xFFFFFFFFFFFFFFFF, 0)>;
+def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>;
+def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>;
+def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>;
// Conversion to f32
-
-def CVT_f32_pred
- : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
- "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0
- [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
-
-def CVT_f32_u16
- : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
-
-def CVT_f32_u32
- : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI32:$a))]>;
-
-def CVT_f32_u64
- : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI64:$a))]>;
-
-def CVT_f32_f64
- : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a",
- [(set RegF32:$d, (fround RegF64:$a))]>;
+def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a,
+ (MOVf32i32 0x3F800000), (MOVf32i32 0))>;
+def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>;
+def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>;
// Conversion to f64
+def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a,
+ (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>;
+def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
+def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
-def CVT_f64_pred
- : InstPTX<(outs RegF64:$d), (ins RegPred:$a),
- "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0
- [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
-
-def CVT_f64_u16
- : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
-
-def CVT_f64_u32
- : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI32:$a))]>;
-
-def CVT_f64_u64
- : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI64:$a))]>;
-
-def CVT_f64_f32
- : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a",
- [(set RegF64:$d, (fextend RegF32:$a))]>;
-
-///===- Control Flow Instructions -----------------------------------------===//
-
-let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
- def BRAd
- : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
-}
-
-let isBranch = 1, isTerminator = 1 in {
- // FIXME: The pattern part is blank because I cannot (or do not yet know
- // how to) use the first operand of PredicateOperand (a RegPred register) here
- def BRAdp
- : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
- [/*(brcond pred:$_p, bb:$d)*/]>;
-}
-
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
- def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
-}
-
-///===- Spill Instructions ------------------------------------------------===//
-// Special instructions used for stack spilling
-def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
- "mov.u16\ts$d, $a", []>;
-def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
- "mov.u32\ts$d, $a", []>;
-def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
- "mov.u64\ts$d, $a", []>;
-def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
- "mov.f32\ts$d, $a", []>;
-def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
- "mov.f64\ts$d, $a", []>;
-
-def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
- "mov.u16\t$d, s$a", []>;
-def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
- "mov.u32\t$d, s$a", []>;
-def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
- "mov.u64\t$d, s$a", []>;
-def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
- "mov.f32\t$d, s$a", []>;
-def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
- "mov.f64\t$d, s$a", []>;
///===- Intrinsic Instructions --------------------------------------------===//
-
include "PTXIntrinsicInstrInfo.td"
+
+///===- Load/Store Instructions -------------------------------------------===//
+include "PTXInstrLoadStore.td"
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td
new file mode 100644
index 0000000..9b4f56c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td
@@ -0,0 +1,278 @@
+//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX load/store instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Addressing Predicates
+// We have to differentiate between 32- and 64-bit pointer types
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments for Loads/Stores
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Constant;
+ return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+def store_global
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def store_shared
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
+def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
+def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
+
+// Address operands
+def MEMri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI32, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI64, i64imm);
+}
+def LOCALri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def LOCALri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+def MEMii32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMii64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
+def MEMpi : Operand<i32> {
+ let PrintMethod = "printParamOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+def MEMret : Operand<i32> {
+ let PrintMethod = "printReturnOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// Load/store .param space
+def PTXloadparam
+ : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+ : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+def PTXreadparam
+ : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXwriteparam
+ : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Classes for loads/stores
+//===----------------------------------------------------------------------===//
+multiclass PTX_LD<string opstr, string typestr,
+ RegisterClass RC, PatFrag pat_load> {
+ def rr32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs RC:$d),
+ (ins MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs RC:$d),
+ (ins MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
+ PatFrag pat_store> {
+ def rr32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs),
+ (ins RC:$d, MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs),
+ (ins RC:$d, MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
+ def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal32:$a))]>;
+ def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal64:$a))]>;
+ def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal32:$a)]>;
+ def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal64:$a)]>;
+}
+
+multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
+ let hasSideEffects = 1 in {
+ def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a),
+ !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (PTXloadparam texternalsym:$a))]>;
+ def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a),
+ !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")),
+ [(PTXstoreparam texternalsym:$d, RC:$a)]>;
+ }
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for loads/stores
+//===----------------------------------------------------------------------===//
+
+// Global/shared stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// Global/shared/constant loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// Param loads/stores
+defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>;
+defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>;
+defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>;
+defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>;
+defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>;
+defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>;
+
+// Local loads/stores
+defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
+defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
+defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
+defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
+defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
+defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
index 8d97909..9de1cb6 100644
--- a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
+++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -25,37 +25,63 @@ class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
// TODO Add read vector-version of special registers
-//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
-def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>;
-def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>;
-def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>;
-def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>;
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid",
+// int_ptx_read_tid_r64>;
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
+ int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
+ int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
+ int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
+ int_ptx_read_tid_w>;
-//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
-def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>;
-def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>;
-def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>;
-def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>;
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid",
+// int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
+ int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
+ int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
+ int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
+ int_ptx_read_ntid_w>;
-def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>;
-def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>;
-def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>;
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
+ int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
+ int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
+ int_ptx_read_nwarpid>;
-//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
-def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>;
-def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>;
-def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>;
-def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>;
+//def PTX_READ_CTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
+ int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
+ int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
+ int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
+ int_ptx_read_ctaid_w>;
-//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
-def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>;
-def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>;
-def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>;
-def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>;
+//def PTX_READ_NCTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
+ int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
+ int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
+ int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
+ int_ptx_read_nctaid_w>;
-def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>;
-def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>;
-def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>;
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
+ int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
+ int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
+ int_ptx_read_gridid>;
def PTX_READ_LANEMASK_EQ
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
index b13a3da..468ce93 100644
--- a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -100,7 +100,7 @@ public:
/// @{
virtual void ChangeSection(const MCSection *Section);
- virtual void InitSections() {}
+ virtual void InitSections() { /* PTX does not use sections */ }
virtual void EmitLabel(MCSymbol *Symbol);
@@ -132,7 +132,9 @@ public:
///
/// @param Symbol - The common symbol to emit.
/// @param Size - The size of the common symbol.
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+ /// @param ByteAlignment - The alignment of the common symbol in bytes.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
@@ -233,7 +235,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- //assert(getCurrentSection() && "Cannot emit before setting section!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
OS << *Symbol << MAI.getLabelSuffix();
EmitEOL();
@@ -283,7 +285,8 @@ void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
-void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
unsigned Size, unsigned ByteAlignment) {}
@@ -510,7 +513,7 @@ void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
// If we have an AsmPrinter, use that to print, otherwise print the MCInst.
if (InstPrinter)
- InstPrinter->printInst(&Inst, OS);
+ InstPrinter->printInst(&Inst, OS, "");
else
Inst.print(OS, &MAI);
EmitEOL();
@@ -533,7 +536,7 @@ namespace llvm {
formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc, bool useCFI,
MCInstPrinter *IP,
- MCCodeEmitter *CE, TargetAsmBackend *TAB,
+ MCCodeEmitter *CE, MCAsmBackend *MAB,
bool ShowInst) {
return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
IP, CE, ShowInst);
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp
new file mode 100644
index 0000000..142e639
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp
@@ -0,0 +1,32 @@
+//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PTX MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXAsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp;
+ OutMI.addOperand(AP.lowerOperand(MO));
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
index 6fe9e6c..b33a273 100644
--- a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -52,36 +52,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
-
- DEBUG(dbgs()
- << "PTX::NoRegister == " << PTX::NoRegister << "\n"
- << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
-
- DEBUG(for (unsigned reg = PTX::NoRegister + 1;
- reg < PTX::NUM_TARGET_REGS; ++reg)
- if (MRI.isPhysRegUsed(reg))
- dbgs() << "Used Reg: " << reg << "\n";);
-
- // FIXME: This is a slow linear scanning
- for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
- if (MRI.isPhysRegUsed(reg) &&
- !MFI->isRetReg(reg) &&
- (MFI->isKernel() || !MFI->isArgReg(reg)))
- MFI->addLocalVarReg(reg);
-
- // Notify MachineFunctionInfo that I've done adding local var reg
- MFI->doneAddLocalVar();
-
- DEBUG(for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argRegBegin(), e = MFI->argRegEnd();
- i != e; ++i)
- dbgs() << "Arg Reg: " << *i << "\n";);
-
- DEBUG(for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
- i != e; ++i)
- dbgs() << "Local Var Reg: " << *i << "\n";);
+ // Generate list of all virtual registers used in this function
+ for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
+ MFI->addVirtualRegister(TRC, Reg);
+ }
return false;
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
index 9d65f5b..3b985f7 100644
--- a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -15,75 +15,148 @@
#define PTX_MACHINE_FUNCTION_INFO_H
#include "PTX.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
+
/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
/// contains private PTX target-specific information for each MachineFunction.
///
class PTXMachineFunctionInfo : public MachineFunctionInfo {
private:
- bool is_kernel;
- std::vector<unsigned> reg_arg, reg_local_var;
- std::vector<unsigned> reg_ret;
- bool _isDoneAddArg;
+ bool IsKernel;
+ DenseSet<unsigned> RegArgs;
+ DenseSet<unsigned> RegRets;
+
+ typedef std::vector<unsigned> RegisterList;
+ typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
+ typedef DenseMap<unsigned, std::string> RegisterNameMap;
+ typedef DenseMap<int, std::string> FrameMap;
+
+ RegisterMap UsedRegs;
+ RegisterNameMap RegNames;
+ FrameMap FrameSymbols;
+
+ PTXParamManager ParamManager;
public:
+ typedef DenseSet<unsigned>::const_iterator reg_iterator;
+
PTXMachineFunctionInfo(MachineFunction &MF)
- : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
- reg_arg.reserve(8);
- reg_local_var.reserve(32);
+ : IsKernel(false) {
+ UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
}
- void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
-
- void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
- void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
- void addRetReg(unsigned reg) {
- if (!isRetReg(reg)) {
- reg_ret.push_back(reg);
+ /// getParamManager - Returns the PTXParamManager instance for this function.
+ PTXParamManager& getParamManager() { return ParamManager; }
+ const PTXParamManager& getParamManager() const { return ParamManager; }
+
+ /// setKernel/isKernel - Gets/sets a flag that indicates if this function is
+ /// a PTX kernel function.
+ void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; }
+ bool isKernel() const { return IsKernel; }
+
+ /// argreg_begin/argreg_end - Returns iterators to the set of registers
+ /// containing function arguments.
+ reg_iterator argreg_begin() const { return RegArgs.begin(); }
+ reg_iterator argreg_end() const { return RegArgs.end(); }
+
+ /// retreg_begin/retreg_end - Returns iterators to the set of registers
+ /// containing the function return values.
+ reg_iterator retreg_begin() const { return RegRets.begin(); }
+ reg_iterator retreg_end() const { return RegRets.end(); }
+
+ /// addRetReg - Adds a register to the set of return-value registers.
+ void addRetReg(unsigned Reg) {
+ if (!RegRets.count(Reg)) {
+ RegRets.insert(Reg);
+ std::string name;
+ name = "%ret";
+ name += utostr(RegRets.size() - 1);
+ RegNames[Reg] = name;
}
}
- void doneAddArg(void) {
- _isDoneAddArg = true;
+ /// addArgReg - Adds a register to the set of function argument registers.
+ void addArgReg(unsigned Reg) {
+ RegArgs.insert(Reg);
+ std::string name;
+ name = "%param";
+ name += utostr(RegArgs.size() - 1);
+ RegNames[Reg] = name;
}
- void doneAddLocalVar(void) {}
-
- bool isKernel() const { return is_kernel; }
- typedef std::vector<unsigned>::const_iterator reg_iterator;
- typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
- typedef std::vector<unsigned>::const_iterator ret_iterator;
-
- bool argRegEmpty() const { return reg_arg.empty(); }
- int getNumArg() const { return reg_arg.size(); }
- reg_iterator argRegBegin() const { return reg_arg.begin(); }
- reg_iterator argRegEnd() const { return reg_arg.end(); }
- reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
- reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
-
- bool localVarRegEmpty() const { return reg_local_var.empty(); }
- reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
- reg_iterator localVarRegEnd() const { return reg_local_var.end(); }
-
- bool retRegEmpty() const { return reg_ret.empty(); }
- int getNumRet() const { return reg_ret.size(); }
- ret_iterator retRegBegin() const { return reg_ret.begin(); }
- ret_iterator retRegEnd() const { return reg_ret.end(); }
+ /// addVirtualRegister - Adds a virtual register to the set of all used
+ /// registers in the function.
+ void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
+ std::string name;
+
+ // Do not count registers that are argument/return registers.
+ if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
+ UsedRegs[TRC].push_back(Reg);
+ if (TRC == PTX::RegPredRegisterClass)
+ name = "%p";
+ else if (TRC == PTX::RegI16RegisterClass)
+ name = "%rh";
+ else if (TRC == PTX::RegI32RegisterClass)
+ name = "%r";
+ else if (TRC == PTX::RegI64RegisterClass)
+ name = "%rd";
+ else if (TRC == PTX::RegF32RegisterClass)
+ name = "%f";
+ else if (TRC == PTX::RegF64RegisterClass)
+ name = "%fd";
+ else
+ llvm_unreachable("Invalid register class");
+
+ name += utostr(UsedRegs[TRC].size() - 1);
+ RegNames[Reg] = name;
+ }
+ }
- bool isArgReg(unsigned reg) const {
- return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
+ /// getRegisterName - Returns the name of the specified virtual register. This
+ /// name is used during PTX emission.
+ const char *getRegisterName(unsigned Reg) const {
+ if (RegNames.count(Reg))
+ return RegNames.find(Reg)->second.c_str();
+ else if (Reg == PTX::NoRegister)
+ return "%noreg";
+ else
+ llvm_unreachable("Register not in register name map");
}
- bool isRetReg(unsigned reg) const {
- return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end();
+ /// getNumRegistersForClass - Returns the number of virtual registers that are
+ /// used for the specified register class.
+ unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
+ return UsedRegs.lookup(TRC).size();
}
- bool isLocalVarReg(unsigned reg) const {
- return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
- != reg_local_var.end();
+ /// getFrameSymbol - Returns the symbol name for the given FrameIndex.
+ const char* getFrameSymbol(int FrameIndex) {
+ if (FrameSymbols.count(FrameIndex)) {
+ return FrameSymbols.lookup(FrameIndex).c_str();
+ } else {
+ std::string Name = "__local";
+ Name += utostr(FrameIndex);
+ // The whole point of caching this name is to ensure the pointer we pass
+ // to any getExternalSymbol() calls will remain valid for the lifetime of
+ // the back-end instance. This is to work around an issue in SelectionDAG
+ // where symbol names are expected to be life-long strings.
+ FrameSymbols[FrameIndex] = Name;
+ return FrameSymbols[FrameIndex].c_str();
+ }
}
}; // class PTXMachineFunctionInfo
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp
new file mode 100644
index 0000000..7753787
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp
@@ -0,0 +1,73 @@
+//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXParamManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXParamManager.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+PTXParamManager::PTXParamManager() {
+}
+
+unsigned PTXParamManager::addArgumentParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_ARGUMENT;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__param_";
+ Name += utostr(ArgumentParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ArgumentParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addReturnParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_RETURN;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__ret_";
+ Name += utostr(ReturnParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ReturnParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addLocalParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_LOCAL;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__localparam_";
+ Name += utostr(LocalParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ LocalParams.push_back(Index);
+
+ return Index;
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.h b/contrib/llvm/lib/Target/PTX/PTXParamManager.h
new file mode 100644
index 0000000..9fd2de5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.h
@@ -0,0 +1,86 @@
+//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTXParamManager class, which manages all defined .param
+// variables for a particular function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_PARAM_MANAGER_H
+#define PTX_PARAM_MANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+/// PTXParamManager - This class manages all .param variables defined for a
+/// particular function.
+class PTXParamManager {
+private:
+
+ /// PTXParamType - Type of a .param variable
+ enum PTXParamType {
+ PTX_PARAM_TYPE_ARGUMENT,
+ PTX_PARAM_TYPE_RETURN,
+ PTX_PARAM_TYPE_LOCAL
+ };
+
+ /// PTXParam - Definition of a PTX .param variable
+ struct PTXParam {
+ PTXParamType Type;
+ unsigned Size;
+ std::string Name;
+ };
+
+ DenseMap<unsigned, PTXParam> AllParams;
+ SmallVector<unsigned, 4> ArgumentParams;
+ SmallVector<unsigned, 4> ReturnParams;
+ SmallVector<unsigned, 4> LocalParams;
+
+public:
+
+ typedef SmallVector<unsigned, 4>::const_iterator param_iterator;
+
+ PTXParamManager();
+
+ param_iterator arg_begin() const { return ArgumentParams.begin(); }
+ param_iterator arg_end() const { return ArgumentParams.end(); }
+ param_iterator ret_begin() const { return ReturnParams.begin(); }
+ param_iterator ret_end() const { return ReturnParams.end(); }
+ param_iterator local_begin() const { return LocalParams.begin(); }
+ param_iterator local_end() const { return LocalParams.end(); }
+
+ /// addArgumentParam - Returns a new .param used as an argument.
+ unsigned addArgumentParam(unsigned Size);
+
+ /// addReturnParam - Returns a new .param used as a return argument.
+ unsigned addReturnParam(unsigned Size);
+
+ /// addLocalParam - Returns a new .param used as a local .param variable.
+ unsigned addLocalParam(unsigned Size);
+
+ /// getParamName - Returns the name of the parameter as a string.
+ const std::string &getParamName(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Name;
+ }
+
+ /// getParamSize - Returns the size of the parameter in bits.
+ unsigned getParamSize(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Size;
+ }
+
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp
new file mode 100644
index 0000000..2d2d5c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp
@@ -0,0 +1,58 @@
+//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a register allocator for PTX code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-reg-alloc"
+
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+
+using namespace llvm;
+
+namespace {
+ // Special register allocator for PTX.
+ class PTXRegAlloc : public MachineFunctionPass {
+ public:
+ static char ID;
+ PTXRegAlloc() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char* getPassName() const {
+ return "PTX Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // We do not actually do anything (at least not yet).
+ return false;
+ }
+ };
+
+ char PTXRegAlloc::ID = 0;
+
+ static RegisterRegAlloc
+ ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator);
+}
+
+FunctionPass *llvm::createPTXRegisterAllocator() {
+ return new PTXRegAlloc();
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
index cb56ea9..c806266 100644
--- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -14,6 +14,9 @@
#include "PTX.h"
#include "PTXRegisterInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -23,15 +26,23 @@
using namespace llvm;
PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
- const TargetInstrInfo &TII)
- : PTXGenRegisterInfo() {
+ const TargetInstrInfo &tii)
+ // PTX does not have a return address register.
+ : PTXGenRegisterInfo(0), TII(tii) {
}
void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj,
RegScavenger *RS) const {
unsigned Index;
- MachineInstr& MI = *II;
+ MachineInstr &MI = *II;
+ //MachineBasicBlock &MBB = *MI.getParent();
+ //DebugLoc dl = MI.getDebugLoc();
+ //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+
+ //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass);
+
+ llvm_unreachable("FrameIndex should have been previously eliminated!");
Index = 0;
while (!MI.getOperand(Index).isFI()) {
@@ -46,6 +57,18 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+ //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32))
+ //.addReg(Reg, RegState::Define).addImm(FrameIndex);
+ //if (MI2->findFirstPredOperandIdx() == -1) {
+ // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+ // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ //}
+ //MI2->dump();
+
+ //MachineOperand ESOp = MachineOperand::CreateES("__local__");
+
// This frame index is post stack slot re-use assignments
+ //MI.getOperand(Index).ChangeToRegister(Reg, false);
MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+ //MI.getOperand(Index) = ESOp;
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
index 0b63cb6..55fafe4 100644
--- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
@@ -25,8 +25,12 @@ class PTXTargetMachine;
class MachineFunction;
struct PTXRegisterInfo : public PTXGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+public:
PTXRegisterInfo(PTXTargetMachine &TM,
- const TargetInstrInfo &TII);
+ const TargetInstrInfo &tii);
virtual const unsigned
*getCalleeSavedRegs(const MachineFunction *MF = 0) const {
@@ -47,18 +51,6 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo {
llvm_unreachable("PTX does not have a frame register");
return 0;
}
-
- virtual unsigned getRARegister() const {
- llvm_unreachable("PTX does not have a return address register");
- return 0;
- }
-
- virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
- }
- virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const {
- return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
- }
}; // struct PTXRegisterInfo
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
index 1313d24..6ed6d3f 100644
--- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
@@ -20,536 +20,18 @@ class PTXReg<string n> : Register<n> {
// Registers
//===----------------------------------------------------------------------===//
-///===- Predicate Registers -----------------------------------------------===//
-
-def P0 : PTXReg<"p0">;
-def P1 : PTXReg<"p1">;
-def P2 : PTXReg<"p2">;
-def P3 : PTXReg<"p3">;
-def P4 : PTXReg<"p4">;
-def P5 : PTXReg<"p5">;
-def P6 : PTXReg<"p6">;
-def P7 : PTXReg<"p7">;
-def P8 : PTXReg<"p8">;
-def P9 : PTXReg<"p9">;
-def P10 : PTXReg<"p10">;
-def P11 : PTXReg<"p11">;
-def P12 : PTXReg<"p12">;
-def P13 : PTXReg<"p13">;
-def P14 : PTXReg<"p14">;
-def P15 : PTXReg<"p15">;
-def P16 : PTXReg<"p16">;
-def P17 : PTXReg<"p17">;
-def P18 : PTXReg<"p18">;
-def P19 : PTXReg<"p19">;
-def P20 : PTXReg<"p20">;
-def P21 : PTXReg<"p21">;
-def P22 : PTXReg<"p22">;
-def P23 : PTXReg<"p23">;
-def P24 : PTXReg<"p24">;
-def P25 : PTXReg<"p25">;
-def P26 : PTXReg<"p26">;
-def P27 : PTXReg<"p27">;
-def P28 : PTXReg<"p28">;
-def P29 : PTXReg<"p29">;
-def P30 : PTXReg<"p30">;
-def P31 : PTXReg<"p31">;
-def P32 : PTXReg<"p32">;
-def P33 : PTXReg<"p33">;
-def P34 : PTXReg<"p34">;
-def P35 : PTXReg<"p35">;
-def P36 : PTXReg<"p36">;
-def P37 : PTXReg<"p37">;
-def P38 : PTXReg<"p38">;
-def P39 : PTXReg<"p39">;
-def P40 : PTXReg<"p40">;
-def P41 : PTXReg<"p41">;
-def P42 : PTXReg<"p42">;
-def P43 : PTXReg<"p43">;
-def P44 : PTXReg<"p44">;
-def P45 : PTXReg<"p45">;
-def P46 : PTXReg<"p46">;
-def P47 : PTXReg<"p47">;
-def P48 : PTXReg<"p48">;
-def P49 : PTXReg<"p49">;
-def P50 : PTXReg<"p50">;
-def P51 : PTXReg<"p51">;
-def P52 : PTXReg<"p52">;
-def P53 : PTXReg<"p53">;
-def P54 : PTXReg<"p54">;
-def P55 : PTXReg<"p55">;
-def P56 : PTXReg<"p56">;
-def P57 : PTXReg<"p57">;
-def P58 : PTXReg<"p58">;
-def P59 : PTXReg<"p59">;
-def P60 : PTXReg<"p60">;
-def P61 : PTXReg<"p61">;
-def P62 : PTXReg<"p62">;
-def P63 : PTXReg<"p63">;
-def P64 : PTXReg<"p64">;
-def P65 : PTXReg<"p65">;
-def P66 : PTXReg<"p66">;
-def P67 : PTXReg<"p67">;
-def P68 : PTXReg<"p68">;
-def P69 : PTXReg<"p69">;
-def P70 : PTXReg<"p70">;
-def P71 : PTXReg<"p71">;
-def P72 : PTXReg<"p72">;
-def P73 : PTXReg<"p73">;
-def P74 : PTXReg<"p74">;
-def P75 : PTXReg<"p75">;
-def P76 : PTXReg<"p76">;
-def P77 : PTXReg<"p77">;
-def P78 : PTXReg<"p78">;
-def P79 : PTXReg<"p79">;
-def P80 : PTXReg<"p80">;
-def P81 : PTXReg<"p81">;
-def P82 : PTXReg<"p82">;
-def P83 : PTXReg<"p83">;
-def P84 : PTXReg<"p84">;
-def P85 : PTXReg<"p85">;
-def P86 : PTXReg<"p86">;
-def P87 : PTXReg<"p87">;
-def P88 : PTXReg<"p88">;
-def P89 : PTXReg<"p89">;
-def P90 : PTXReg<"p90">;
-def P91 : PTXReg<"p91">;
-def P92 : PTXReg<"p92">;
-def P93 : PTXReg<"p93">;
-def P94 : PTXReg<"p94">;
-def P95 : PTXReg<"p95">;
-def P96 : PTXReg<"p96">;
-def P97 : PTXReg<"p97">;
-def P98 : PTXReg<"p98">;
-def P99 : PTXReg<"p99">;
-def P100 : PTXReg<"p100">;
-def P101 : PTXReg<"p101">;
-def P102 : PTXReg<"p102">;
-def P103 : PTXReg<"p103">;
-def P104 : PTXReg<"p104">;
-def P105 : PTXReg<"p105">;
-def P106 : PTXReg<"p106">;
-def P107 : PTXReg<"p107">;
-def P108 : PTXReg<"p108">;
-def P109 : PTXReg<"p109">;
-def P110 : PTXReg<"p110">;
-def P111 : PTXReg<"p111">;
-def P112 : PTXReg<"p112">;
-def P113 : PTXReg<"p113">;
-def P114 : PTXReg<"p114">;
-def P115 : PTXReg<"p115">;
-def P116 : PTXReg<"p116">;
-def P117 : PTXReg<"p117">;
-def P118 : PTXReg<"p118">;
-def P119 : PTXReg<"p119">;
-def P120 : PTXReg<"p120">;
-def P121 : PTXReg<"p121">;
-def P122 : PTXReg<"p122">;
-def P123 : PTXReg<"p123">;
-def P124 : PTXReg<"p124">;
-def P125 : PTXReg<"p125">;
-def P126 : PTXReg<"p126">;
-def P127 : PTXReg<"p127">;
-
-///===- 16-Bit Registers --------------------------------------------------===//
-
-def RH0 : PTXReg<"rh0">;
-def RH1 : PTXReg<"rh1">;
-def RH2 : PTXReg<"rh2">;
-def RH3 : PTXReg<"rh3">;
-def RH4 : PTXReg<"rh4">;
-def RH5 : PTXReg<"rh5">;
-def RH6 : PTXReg<"rh6">;
-def RH7 : PTXReg<"rh7">;
-def RH8 : PTXReg<"rh8">;
-def RH9 : PTXReg<"rh9">;
-def RH10 : PTXReg<"rh10">;
-def RH11 : PTXReg<"rh11">;
-def RH12 : PTXReg<"rh12">;
-def RH13 : PTXReg<"rh13">;
-def RH14 : PTXReg<"rh14">;
-def RH15 : PTXReg<"rh15">;
-def RH16 : PTXReg<"rh16">;
-def RH17 : PTXReg<"rh17">;
-def RH18 : PTXReg<"rh18">;
-def RH19 : PTXReg<"rh19">;
-def RH20 : PTXReg<"rh20">;
-def RH21 : PTXReg<"rh21">;
-def RH22 : PTXReg<"rh22">;
-def RH23 : PTXReg<"rh23">;
-def RH24 : PTXReg<"rh24">;
-def RH25 : PTXReg<"rh25">;
-def RH26 : PTXReg<"rh26">;
-def RH27 : PTXReg<"rh27">;
-def RH28 : PTXReg<"rh28">;
-def RH29 : PTXReg<"rh29">;
-def RH30 : PTXReg<"rh30">;
-def RH31 : PTXReg<"rh31">;
-def RH32 : PTXReg<"rh32">;
-def RH33 : PTXReg<"rh33">;
-def RH34 : PTXReg<"rh34">;
-def RH35 : PTXReg<"rh35">;
-def RH36 : PTXReg<"rh36">;
-def RH37 : PTXReg<"rh37">;
-def RH38 : PTXReg<"rh38">;
-def RH39 : PTXReg<"rh39">;
-def RH40 : PTXReg<"rh40">;
-def RH41 : PTXReg<"rh41">;
-def RH42 : PTXReg<"rh42">;
-def RH43 : PTXReg<"rh43">;
-def RH44 : PTXReg<"rh44">;
-def RH45 : PTXReg<"rh45">;
-def RH46 : PTXReg<"rh46">;
-def RH47 : PTXReg<"rh47">;
-def RH48 : PTXReg<"rh48">;
-def RH49 : PTXReg<"rh49">;
-def RH50 : PTXReg<"rh50">;
-def RH51 : PTXReg<"rh51">;
-def RH52 : PTXReg<"rh52">;
-def RH53 : PTXReg<"rh53">;
-def RH54 : PTXReg<"rh54">;
-def RH55 : PTXReg<"rh55">;
-def RH56 : PTXReg<"rh56">;
-def RH57 : PTXReg<"rh57">;
-def RH58 : PTXReg<"rh58">;
-def RH59 : PTXReg<"rh59">;
-def RH60 : PTXReg<"rh60">;
-def RH61 : PTXReg<"rh61">;
-def RH62 : PTXReg<"rh62">;
-def RH63 : PTXReg<"rh63">;
-def RH64 : PTXReg<"rh64">;
-def RH65 : PTXReg<"rh65">;
-def RH66 : PTXReg<"rh66">;
-def RH67 : PTXReg<"rh67">;
-def RH68 : PTXReg<"rh68">;
-def RH69 : PTXReg<"rh69">;
-def RH70 : PTXReg<"rh70">;
-def RH71 : PTXReg<"rh71">;
-def RH72 : PTXReg<"rh72">;
-def RH73 : PTXReg<"rh73">;
-def RH74 : PTXReg<"rh74">;
-def RH75 : PTXReg<"rh75">;
-def RH76 : PTXReg<"rh76">;
-def RH77 : PTXReg<"rh77">;
-def RH78 : PTXReg<"rh78">;
-def RH79 : PTXReg<"rh79">;
-def RH80 : PTXReg<"rh80">;
-def RH81 : PTXReg<"rh81">;
-def RH82 : PTXReg<"rh82">;
-def RH83 : PTXReg<"rh83">;
-def RH84 : PTXReg<"rh84">;
-def RH85 : PTXReg<"rh85">;
-def RH86 : PTXReg<"rh86">;
-def RH87 : PTXReg<"rh87">;
-def RH88 : PTXReg<"rh88">;
-def RH89 : PTXReg<"rh89">;
-def RH90 : PTXReg<"rh90">;
-def RH91 : PTXReg<"rh91">;
-def RH92 : PTXReg<"rh92">;
-def RH93 : PTXReg<"rh93">;
-def RH94 : PTXReg<"rh94">;
-def RH95 : PTXReg<"rh95">;
-def RH96 : PTXReg<"rh96">;
-def RH97 : PTXReg<"rh97">;
-def RH98 : PTXReg<"rh98">;
-def RH99 : PTXReg<"rh99">;
-def RH100 : PTXReg<"rh100">;
-def RH101 : PTXReg<"rh101">;
-def RH102 : PTXReg<"rh102">;
-def RH103 : PTXReg<"rh103">;
-def RH104 : PTXReg<"rh104">;
-def RH105 : PTXReg<"rh105">;
-def RH106 : PTXReg<"rh106">;
-def RH107 : PTXReg<"rh107">;
-def RH108 : PTXReg<"rh108">;
-def RH109 : PTXReg<"rh109">;
-def RH110 : PTXReg<"rh110">;
-def RH111 : PTXReg<"rh111">;
-def RH112 : PTXReg<"rh112">;
-def RH113 : PTXReg<"rh113">;
-def RH114 : PTXReg<"rh114">;
-def RH115 : PTXReg<"rh115">;
-def RH116 : PTXReg<"rh116">;
-def RH117 : PTXReg<"rh117">;
-def RH118 : PTXReg<"rh118">;
-def RH119 : PTXReg<"rh119">;
-def RH120 : PTXReg<"rh120">;
-def RH121 : PTXReg<"rh121">;
-def RH122 : PTXReg<"rh122">;
-def RH123 : PTXReg<"rh123">;
-def RH124 : PTXReg<"rh124">;
-def RH125 : PTXReg<"rh125">;
-def RH126 : PTXReg<"rh126">;
-def RH127 : PTXReg<"rh127">;
-
-///===- 32-Bit Registers --------------------------------------------------===//
-
-def R0 : PTXReg<"r0">;
-def R1 : PTXReg<"r1">;
-def R2 : PTXReg<"r2">;
-def R3 : PTXReg<"r3">;
-def R4 : PTXReg<"r4">;
-def R5 : PTXReg<"r5">;
-def R6 : PTXReg<"r6">;
-def R7 : PTXReg<"r7">;
-def R8 : PTXReg<"r8">;
-def R9 : PTXReg<"r9">;
-def R10 : PTXReg<"r10">;
-def R11 : PTXReg<"r11">;
-def R12 : PTXReg<"r12">;
-def R13 : PTXReg<"r13">;
-def R14 : PTXReg<"r14">;
-def R15 : PTXReg<"r15">;
-def R16 : PTXReg<"r16">;
-def R17 : PTXReg<"r17">;
-def R18 : PTXReg<"r18">;
-def R19 : PTXReg<"r19">;
-def R20 : PTXReg<"r20">;
-def R21 : PTXReg<"r21">;
-def R22 : PTXReg<"r22">;
-def R23 : PTXReg<"r23">;
-def R24 : PTXReg<"r24">;
-def R25 : PTXReg<"r25">;
-def R26 : PTXReg<"r26">;
-def R27 : PTXReg<"r27">;
-def R28 : PTXReg<"r28">;
-def R29 : PTXReg<"r29">;
-def R30 : PTXReg<"r30">;
-def R31 : PTXReg<"r31">;
-def R32 : PTXReg<"r32">;
-def R33 : PTXReg<"r33">;
-def R34 : PTXReg<"r34">;
-def R35 : PTXReg<"r35">;
-def R36 : PTXReg<"r36">;
-def R37 : PTXReg<"r37">;
-def R38 : PTXReg<"r38">;
-def R39 : PTXReg<"r39">;
-def R40 : PTXReg<"r40">;
-def R41 : PTXReg<"r41">;
-def R42 : PTXReg<"r42">;
-def R43 : PTXReg<"r43">;
-def R44 : PTXReg<"r44">;
-def R45 : PTXReg<"r45">;
-def R46 : PTXReg<"r46">;
-def R47 : PTXReg<"r47">;
-def R48 : PTXReg<"r48">;
-def R49 : PTXReg<"r49">;
-def R50 : PTXReg<"r50">;
-def R51 : PTXReg<"r51">;
-def R52 : PTXReg<"r52">;
-def R53 : PTXReg<"r53">;
-def R54 : PTXReg<"r54">;
-def R55 : PTXReg<"r55">;
-def R56 : PTXReg<"r56">;
-def R57 : PTXReg<"r57">;
-def R58 : PTXReg<"r58">;
-def R59 : PTXReg<"r59">;
-def R60 : PTXReg<"r60">;
-def R61 : PTXReg<"r61">;
-def R62 : PTXReg<"r62">;
-def R63 : PTXReg<"r63">;
-def R64 : PTXReg<"r64">;
-def R65 : PTXReg<"r65">;
-def R66 : PTXReg<"r66">;
-def R67 : PTXReg<"r67">;
-def R68 : PTXReg<"r68">;
-def R69 : PTXReg<"r69">;
-def R70 : PTXReg<"r70">;
-def R71 : PTXReg<"r71">;
-def R72 : PTXReg<"r72">;
-def R73 : PTXReg<"r73">;
-def R74 : PTXReg<"r74">;
-def R75 : PTXReg<"r75">;
-def R76 : PTXReg<"r76">;
-def R77 : PTXReg<"r77">;
-def R78 : PTXReg<"r78">;
-def R79 : PTXReg<"r79">;
-def R80 : PTXReg<"r80">;
-def R81 : PTXReg<"r81">;
-def R82 : PTXReg<"r82">;
-def R83 : PTXReg<"r83">;
-def R84 : PTXReg<"r84">;
-def R85 : PTXReg<"r85">;
-def R86 : PTXReg<"r86">;
-def R87 : PTXReg<"r87">;
-def R88 : PTXReg<"r88">;
-def R89 : PTXReg<"r89">;
-def R90 : PTXReg<"r90">;
-def R91 : PTXReg<"r91">;
-def R92 : PTXReg<"r92">;
-def R93 : PTXReg<"r93">;
-def R94 : PTXReg<"r94">;
-def R95 : PTXReg<"r95">;
-def R96 : PTXReg<"r96">;
-def R97 : PTXReg<"r97">;
-def R98 : PTXReg<"r98">;
-def R99 : PTXReg<"r99">;
-def R100 : PTXReg<"r100">;
-def R101 : PTXReg<"r101">;
-def R102 : PTXReg<"r102">;
-def R103 : PTXReg<"r103">;
-def R104 : PTXReg<"r104">;
-def R105 : PTXReg<"r105">;
-def R106 : PTXReg<"r106">;
-def R107 : PTXReg<"r107">;
-def R108 : PTXReg<"r108">;
-def R109 : PTXReg<"r109">;
-def R110 : PTXReg<"r110">;
-def R111 : PTXReg<"r111">;
-def R112 : PTXReg<"r112">;
-def R113 : PTXReg<"r113">;
-def R114 : PTXReg<"r114">;
-def R115 : PTXReg<"r115">;
-def R116 : PTXReg<"r116">;
-def R117 : PTXReg<"r117">;
-def R118 : PTXReg<"r118">;
-def R119 : PTXReg<"r119">;
-def R120 : PTXReg<"r120">;
-def R121 : PTXReg<"r121">;
-def R122 : PTXReg<"r122">;
-def R123 : PTXReg<"r123">;
-def R124 : PTXReg<"r124">;
-def R125 : PTXReg<"r125">;
-def R126 : PTXReg<"r126">;
-def R127 : PTXReg<"r127">;
-
-///===- 64-Bit Registers --------------------------------------------------===//
-
-def RD0 : PTXReg<"rd0">;
-def RD1 : PTXReg<"rd1">;
-def RD2 : PTXReg<"rd2">;
-def RD3 : PTXReg<"rd3">;
-def RD4 : PTXReg<"rd4">;
-def RD5 : PTXReg<"rd5">;
-def RD6 : PTXReg<"rd6">;
-def RD7 : PTXReg<"rd7">;
-def RD8 : PTXReg<"rd8">;
-def RD9 : PTXReg<"rd9">;
-def RD10 : PTXReg<"rd10">;
-def RD11 : PTXReg<"rd11">;
-def RD12 : PTXReg<"rd12">;
-def RD13 : PTXReg<"rd13">;
-def RD14 : PTXReg<"rd14">;
-def RD15 : PTXReg<"rd15">;
-def RD16 : PTXReg<"rd16">;
-def RD17 : PTXReg<"rd17">;
-def RD18 : PTXReg<"rd18">;
-def RD19 : PTXReg<"rd19">;
-def RD20 : PTXReg<"rd20">;
-def RD21 : PTXReg<"rd21">;
-def RD22 : PTXReg<"rd22">;
-def RD23 : PTXReg<"rd23">;
-def RD24 : PTXReg<"rd24">;
-def RD25 : PTXReg<"rd25">;
-def RD26 : PTXReg<"rd26">;
-def RD27 : PTXReg<"rd27">;
-def RD28 : PTXReg<"rd28">;
-def RD29 : PTXReg<"rd29">;
-def RD30 : PTXReg<"rd30">;
-def RD31 : PTXReg<"rd31">;
-def RD32 : PTXReg<"rd32">;
-def RD33 : PTXReg<"rd33">;
-def RD34 : PTXReg<"rd34">;
-def RD35 : PTXReg<"rd35">;
-def RD36 : PTXReg<"rd36">;
-def RD37 : PTXReg<"rd37">;
-def RD38 : PTXReg<"rd38">;
-def RD39 : PTXReg<"rd39">;
-def RD40 : PTXReg<"rd40">;
-def RD41 : PTXReg<"rd41">;
-def RD42 : PTXReg<"rd42">;
-def RD43 : PTXReg<"rd43">;
-def RD44 : PTXReg<"rd44">;
-def RD45 : PTXReg<"rd45">;
-def RD46 : PTXReg<"rd46">;
-def RD47 : PTXReg<"rd47">;
-def RD48 : PTXReg<"rd48">;
-def RD49 : PTXReg<"rd49">;
-def RD50 : PTXReg<"rd50">;
-def RD51 : PTXReg<"rd51">;
-def RD52 : PTXReg<"rd52">;
-def RD53 : PTXReg<"rd53">;
-def RD54 : PTXReg<"rd54">;
-def RD55 : PTXReg<"rd55">;
-def RD56 : PTXReg<"rd56">;
-def RD57 : PTXReg<"rd57">;
-def RD58 : PTXReg<"rd58">;
-def RD59 : PTXReg<"rd59">;
-def RD60 : PTXReg<"rd60">;
-def RD61 : PTXReg<"rd61">;
-def RD62 : PTXReg<"rd62">;
-def RD63 : PTXReg<"rd63">;
-def RD64 : PTXReg<"rd64">;
-def RD65 : PTXReg<"rd65">;
-def RD66 : PTXReg<"rd66">;
-def RD67 : PTXReg<"rd67">;
-def RD68 : PTXReg<"rd68">;
-def RD69 : PTXReg<"rd69">;
-def RD70 : PTXReg<"rd70">;
-def RD71 : PTXReg<"rd71">;
-def RD72 : PTXReg<"rd72">;
-def RD73 : PTXReg<"rd73">;
-def RD74 : PTXReg<"rd74">;
-def RD75 : PTXReg<"rd75">;
-def RD76 : PTXReg<"rd76">;
-def RD77 : PTXReg<"rd77">;
-def RD78 : PTXReg<"rd78">;
-def RD79 : PTXReg<"rd79">;
-def RD80 : PTXReg<"rd80">;
-def RD81 : PTXReg<"rd81">;
-def RD82 : PTXReg<"rd82">;
-def RD83 : PTXReg<"rd83">;
-def RD84 : PTXReg<"rd84">;
-def RD85 : PTXReg<"rd85">;
-def RD86 : PTXReg<"rd86">;
-def RD87 : PTXReg<"rd87">;
-def RD88 : PTXReg<"rd88">;
-def RD89 : PTXReg<"rd89">;
-def RD90 : PTXReg<"rd90">;
-def RD91 : PTXReg<"rd91">;
-def RD92 : PTXReg<"rd92">;
-def RD93 : PTXReg<"rd93">;
-def RD94 : PTXReg<"rd94">;
-def RD95 : PTXReg<"rd95">;
-def RD96 : PTXReg<"rd96">;
-def RD97 : PTXReg<"rd97">;
-def RD98 : PTXReg<"rd98">;
-def RD99 : PTXReg<"rd99">;
-def RD100 : PTXReg<"rd100">;
-def RD101 : PTXReg<"rd101">;
-def RD102 : PTXReg<"rd102">;
-def RD103 : PTXReg<"rd103">;
-def RD104 : PTXReg<"rd104">;
-def RD105 : PTXReg<"rd105">;
-def RD106 : PTXReg<"rd106">;
-def RD107 : PTXReg<"rd107">;
-def RD108 : PTXReg<"rd108">;
-def RD109 : PTXReg<"rd109">;
-def RD110 : PTXReg<"rd110">;
-def RD111 : PTXReg<"rd111">;
-def RD112 : PTXReg<"rd112">;
-def RD113 : PTXReg<"rd113">;
-def RD114 : PTXReg<"rd114">;
-def RD115 : PTXReg<"rd115">;
-def RD116 : PTXReg<"rd116">;
-def RD117 : PTXReg<"rd117">;
-def RD118 : PTXReg<"rd118">;
-def RD119 : PTXReg<"rd119">;
-def RD120 : PTXReg<"rd120">;
-def RD121 : PTXReg<"rd121">;
-def RD122 : PTXReg<"rd122">;
-def RD123 : PTXReg<"rd123">;
-def RD124 : PTXReg<"rd124">;
-def RD125 : PTXReg<"rd125">;
-def RD126 : PTXReg<"rd126">;
-def RD127 : PTXReg<"rd127">;
+// The generated register info code throws warnings for empty register classes
+// (e.g. zero-length arrays), so we use a dummy register here just to prevent
+// these warnings.
+def DUMMY_REG : PTXReg<"R0">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
-def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
-def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
-def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
-def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
-def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
-def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;
+def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>;
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp
new file mode 100644
index 0000000..50ef14a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp
@@ -0,0 +1,149 @@
+//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-selectiondag-info"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
+}
+
+PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
+}
+
+SDValue
+PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ // Always inline memcpys. In PTX, we do not have a C library that provides
+ // a memcpy function.
+ //if (!AlwaysInline)
+ // return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+ EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+SDValue PTXSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ llvm_unreachable("memset lowering not implemented for PTX yet");
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h
new file mode 100644
index 0000000..e0c7167
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h
@@ -0,0 +1,53 @@
+//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTX subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXSELECTIONDAGINFO_H
+#define PTXSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
+/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
+class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const PTXSubtarget *Subtarget;
+
+public:
+ explicit PTXSelectionDAGInfo(const TargetMachine &TM);
+ ~PTXSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
index 8ec646e..1eb57d2 100644
--- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
@@ -14,7 +14,7 @@
#include "PTXSubtarget.h"
#include "PTX.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
index 0921f1f..b946d7c 100644
--- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
@@ -114,7 +114,16 @@ class StringRef;
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
}
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ bool callsAreHandled() const {
+ return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool emitPtrAttribute() const {
+ return PTXVersion >= PTX_VERSION_2_2;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
}; // class PTXSubtarget
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
index ab926e0..449a3d9 100644
--- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
@@ -14,8 +14,32 @@
#include "PTX.h"
#include "PTXTargetMachine.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+
using namespace llvm;
@@ -25,7 +49,7 @@ namespace llvm {
bool useCFI,
MCInstPrinter *InstPrint,
MCCodeEmitter *CE,
- TargetAsmBackend *TAB,
+ MCAsmBackend *MAB,
bool ShowInst);
}
@@ -43,34 +67,47 @@ namespace {
"e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
const char* DataLayout64 =
"e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+
+ // Copied from LLVMTargetMachine.cpp
+ void printNoVerify(PassManagerBase &PM, const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+ }
+
+ void printAndVerify(PassManagerBase &PM,
+ const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ //if (VerifyMachineCode)
+ // PM.add(createMachineVerifierPass(Banner));
+ }
}
// DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine::PTXTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS,
+ StringRef TT, StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
InstrInfo(*this),
+ TSInfo(*this),
TLInfo(*this) {
}
-PTX32TargetMachine::PTX32TargetMachine(const Target &T,
- const std::string& TT,
- const std::string& CPU,
- const std::string& FS)
- : PTXTargetMachine(T, TT, CPU, FS, false) {
+PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) {
}
-PTX64TargetMachine::PTX64TargetMachine(const Target &T,
- const std::string& TT,
- const std::string& CPU,
- const std::string& FS)
- : PTXTargetMachine(T, TT, CPU, FS, true) {
+PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) {
}
bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
@@ -82,6 +119,255 @@ bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// PTXMFInfoExtract must after register allocation!
+ //PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ return false;
+}
+
+bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // This is mostly based on LLVMTargetMachine::addPassesToEmitFile
+
+ // Add common CodeGen passes.
+ MCContext *Context = 0;
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ return true;
+ assert(Context != 0 && "Failed to get MCContext");
+
+ if (hasMCSaveTempLabels())
+ Context->setAllowTemporaryLabels(false);
+
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ default: return true;
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = 0;
+ MCAsmBackend *MAB = 0;
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ true, /* verbose asm */
+ hasMCUseLoc(),
+ hasMCUseCFI(),
+ InstPrinter,
+ MCE, MAB,
+ false /* show MC encoding */);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ llvm_unreachable("Object file emission is not supported with PTX");
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ break;
+ }
+
+ // MC Logging
+ //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ PM.add(createGCInfoDeleter());
+ return false;
+}
+
+bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify,
+ MCContext *&OutContext) {
+ // Add standard LLVM codegen passes.
+ // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some
+ // modifications for the PTX target.
+
+ // Standard LLVM-Level Passes.
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ PM.add(createLowerInvokePass(getTargetLowering()));
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel(PM, OptLevel);
+
+ //PM.add(createPrintFunctionPass("\n\n"
+ // "*** Final LLVM Code input to ISel ***\n",
+ // &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Standard Lower-Level Passes.
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getRegisterInfo(),
+ &getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Print the instruction selected machine code...
+ printAndVerify(PM, "After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(true));
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+ }
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createOptimizePHIsPass());
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass");
+
+ PM.add(createMachineLICMPass());
+ PM.add(createMachineCSEPass());
+ PM.add(createMachineSinkingPass());
+ printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PreRegAlloc passes");
+
+ // Perform register allocation.
+ PM.add(createPTXRegisterAllocator());
+ printAndVerify(PM, "After Register Allocation");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ if (OptLevel != CodeGenOpt::None) {
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ PM.add(createStackSlotColoringPass(false));
+
+ // FIXME: Post-RA LICM has asserts that fire on virtual registers.
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //if (!DisablePostRAMachineLICM)
+ // PM.add(createMachineLICMPass(false));
+
+ printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
+ }
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PostRegAlloc passes");
+
+ PM.add(createExpandPostRAPseudosPass());
+ printAndVerify(PM, "After ExpandPostRAPseudos");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM, "After PrologEpilogCodeInserter");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2(PM, OptLevel))
+ printAndVerify(PM, "After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createPostRAScheduler(OptLevel));
+ printAndVerify(PM, "After PostRAScheduler");
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printNoVerify(PM, "After BranchFolding");
+ }
+
+ // Tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(false));
+ printNoVerify(PM, "After TailDuplicate");
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+
+ //if (PrintGCInfo)
+ // PM.add(createGCInfoPrinter(dbgs()));
+
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createCodePlacementOptPass());
+ printNoVerify(PM, "After CodePlacementOpt");
+ }
+
+ if (addPreEmitPass(PM, OptLevel))
+ printNoVerify(PM, "After PreEmit passes");
+
PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ PM.add(createPTXFPRoundingModePass(*this, OptLevel));
+
return false;
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
index ae42153..5b7c82b 100644
--- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
@@ -17,6 +17,7 @@
#include "PTXISelLowering.h"
#include "PTXInstrInfo.h"
#include "PTXFrameLowering.h"
+#include "PTXSelectionDAGInfo.h"
#include "PTXSubtarget.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -25,15 +26,17 @@
namespace llvm {
class PTXTargetMachine : public LLVMTargetMachine {
private:
- const TargetData DataLayout;
- PTXSubtarget Subtarget; // has to be initialized before FrameLowering
- PTXFrameLowering FrameLowering;
- PTXInstrInfo InstrInfo;
- PTXTargetLowering TLInfo;
+ const TargetData DataLayout;
+ PTXSubtarget Subtarget; // has to be initialized before FrameLowering
+ PTXFrameLowering FrameLowering;
+ PTXInstrInfo InstrInfo;
+ PTXSelectionDAGInfo TSInfo;
+ PTXTargetLowering TLInfo;
public:
- PTXTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
+ PTXTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool is64Bit);
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -49,27 +52,62 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual const PTXTargetLowering *getTargetLowering() const {
return &TLInfo; }
+ virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
+
+ // We override this method to supply our own set of codegen passes.
+ virtual bool addPassesToEmitFile(PassManagerBase &,
+ formatted_raw_ostream &,
+ CodeGenFileType,
+ CodeGenOpt::Level,
+ bool = true);
+
+ // Emission of machine code through JITCodeEmitter is not supported.
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &,
+ JITCodeEmitter &,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
+
+ // Emission of machine code through MCJIT is not supported.
+ virtual bool addPassesToEmitMC(PassManagerBase &,
+ MCContext *&,
+ raw_ostream &,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
+
+ private:
+
+ bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
+ bool DisableVerify, MCContext *&OutCtx);
}; // class PTXTargetMachine
class PTX32TargetMachine : public PTXTargetMachine {
public:
- PTX32TargetMachine(const Target &T, const std::string &TT,
- const std::string& CPU, const std::string& FS);
+ PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
}; // class PTX32TargetMachine
class PTX64TargetMachine : public PTXTargetMachine {
public:
- PTX64TargetMachine(const Target &T, const std::string &TT,
- const std::string& CPU, const std::string& FS);
+ PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
}; // class PTX32TargetMachine
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 4b09cf5..0000000
--- a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMPTXInfo
- PTXTargetInfo.cpp
- )
-
-add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
deleted file mode 100644
index 8619785..0000000
--- a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPTXInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
index 9df6c75..09a2735 100644
--- a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "PTX.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/PTX/generate-register-td.py b/contrib/llvm/lib/Target/PTX/generate-register-td.py
deleted file mode 100755
index 1528690..0000000
--- a/contrib/llvm/lib/Target/PTX/generate-register-td.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-##===- generate-register-td.py --------------------------------*-python-*--===##
-##
-## The LLVM Compiler Infrastructure
-##
-## This file is distributed under the University of Illinois Open Source
-## License. See LICENSE.TXT for details.
-##
-##===----------------------------------------------------------------------===##
-##
-## This file describes the PTX register file generator.
-##
-##===----------------------------------------------------------------------===##
-
-from sys import argv, exit, stdout
-
-
-if len(argv) != 5:
- print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
- exit(1)
-
-try:
- num_pred = int(argv[1])
- num_16bit = int(argv[2])
- num_32bit = int(argv[3])
- num_64bit = int(argv[4])
-except:
- print('ERROR: Invalid integer parameter')
- exit(1)
-
-## Print the register definition file
-td_file = open('PTXRegisterInfo.td', 'w')
-
-td_file.write('''
-//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the PTX register file
-//===----------------------------------------------------------------------===//
-
-class PTXReg<string n> : Register<n> {
- let Namespace = "PTX";
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-''')
-
-
-# Print predicate registers
-td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
-for r in range(0, num_pred):
- td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
-
-# Print 16-bit registers
-td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_16bit):
- td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
-
-# Print 32-bit registers
-td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_32bit):
- td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
-
-# Print 64-bit registers
-td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_64bit):
- td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
-
-
-td_file.write('''
-//===----------------------------------------------------------------------===//
-// Register classes
-//===----------------------------------------------------------------------===//
-''')
-
-
-# Print register classes
-
-td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
-td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
-td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
-td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
-td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
-td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
-
-
-td_file.close()
-
-## Now write the PTXCallingConv.td file
-td_file = open('PTXCallingConv.td', 'w')
-
-# Reserve 10% of the available registers for return values, and the other 90%
-# for parameters
-num_ret_pred = int(0.1 * num_pred)
-num_ret_16bit = int(0.1 * num_16bit)
-num_ret_32bit = int(0.1 * num_32bit)
-num_ret_64bit = int(0.1 * num_64bit)
-num_param_pred = num_pred - num_ret_pred
-num_param_16bit = num_16bit - num_ret_16bit
-num_param_32bit = num_32bit - num_ret_32bit
-num_param_64bit = num_64bit - num_ret_64bit
-
-param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
-ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)]
-param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
-ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)]
-param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
-ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)]
-param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
-ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)]
-
-param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
-ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
-param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
-ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
-param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
-ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
-param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
-ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
-
-td_file.write('''
-//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the PTX architecture.
-//
-//===----------------------------------------------------------------------===//
-
-// PTX Formal Parameter Calling Convention
-def CC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[%s]>>,
- CCIfType<[i16], CCAssignToReg<[%s]>>,
- CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
- CCIfType<[i64,f64], CCAssignToReg<[%s]>>
-]>;
-
-// PTX Return Value Calling Convention
-def RetCC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[%s]>>,
- CCIfType<[i16], CCAssignToReg<[%s]>>,
- CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
- CCIfType<[i64,f64], CCAssignToReg<[%s]>>
-]>;
-''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
- ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
-
-
-td_file.close()
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
deleted file mode 100644
index 389ea77..0000000
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMPowerPCAsmPrinter
- PPCInstPrinter.cpp
- )
-add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile
deleted file mode 100644
index f097e84..0000000
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPowerPCAsmPrinter
-
-# Hack: we need to include 'main' powerpc target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 1a9bd76..b6a0835 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -13,7 +13,8 @@
#define DEBUG_TYPE "asm-printer"
#include "PPCInstPrinter.h"
-#include "PPCPredicates.h"
+#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
@@ -30,7 +31,8 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
-void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
@@ -49,6 +51,8 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << ", ";
printOperand(MI, 1, O);
O << ", " << (unsigned int)SH;
+
+ printAnnotation(O, Annot);
return;
}
}
@@ -59,6 +63,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
printOperand(MI, 0, O);
O << ", ";
printOperand(MI, 1, O);
+ printAnnotation(O, Annot);
return;
}
@@ -72,11 +77,13 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << ", ";
printOperand(MI, 1, O);
O << ", " << (unsigned int)SH;
+ printAnnotation(O, Annot);
return;
}
}
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index d022a44..4ed4b76 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -32,7 +32,7 @@ public:
}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
static const char *getInstructionName(unsigned Opcode);
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index a1b8166..0000000
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMPowerPCDesc
- PPCMCTargetDesc.cpp
- PPCMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile
deleted file mode 100644
index 9db6662..0000000
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/PowerPC/TargetDesc/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPowerPCDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 4b8cbb7..9f2fd6d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,17 +7,43 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmBackend.h"
-#include "PPC.h"
-#include "PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case PPC::fixup_ppc_brcond14:
+ return Value & 0x3ffc;
+ case PPC::fixup_ppc_br24:
+ return Value & 0x3fffffc;
+#if 0
+ case PPC::fixup_ppc_hi16:
+ return (Value >> 16) & 0xffff;
+#endif
+ case PPC::fixup_ppc_ha16:
+ return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
+ case PPC::fixup_ppc_lo16:
+ return Value & 0xffff;
+ }
+}
+
namespace {
class PPCMachObjectWriter : public MCMachObjectTargetWriter {
public:
@@ -31,10 +57,17 @@ public:
MCValue Target, uint64_t &FixedValue) {}
};
-class PPCAsmBackend : public TargetAsmBackend {
+class PPCELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend, bool isLittleEndian)
+ : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
+class PPCAsmBackend : public MCAsmBackend {
const Target &TheTarget;
public:
- PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {}
+ PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {}
unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
@@ -49,7 +82,7 @@ public:
};
if (Kind < FirstTargetFixupKind)
- return TargetAsmBackend::getFixupKindInfo(Kind);
+ return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
@@ -109,15 +142,50 @@ namespace {
return false;
}
};
+
+ class ELFPPCAsmBackend : public PPCAsmBackend {
+ Triple::OSType OSType;
+ public:
+ ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) :
+ PPCAsmBackend(T), OSType(OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != 4; ++i)
+ Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ bool is64 = getPointerSize() == 8;
+ return createELFObjectWriter(new PPCELFObjectWriter(
+ /*Is64Bit=*/is64,
+ OSType,
+ is64 ? ELF::EM_PPC64 : ELF::EM_PPC,
+ /*addend*/ true, /*isLittleEndian*/ false),
+ OS, /*IsLittleEndian=*/false);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+ };
+
} // end anonymous namespace
-TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) {
if (Triple(TT).isOSDarwin())
return new DarwinPPCAsmBackend(T);
- return 0;
+ return new ELFPPCAsmBackend(T, Triple(TT).getOS());
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
new file mode 100644
index 0000000..369bbdc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
@@ -0,0 +1,70 @@
+//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the PPC target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCBASEINFO_H
+#define PPCBASEINFO_H
+
+#include "PPCMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// getPPCRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) {
+ using namespace PPC;
+ switch (RegEnum) {
+ case 0: return 0;
+ case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
+ case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
+ case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
+ case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
+ case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
+ case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
+ case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
+ case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
+ case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
+ case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
+ case R10: case X10: case F10: case V10: case CR2EQ: return 10;
+ case R11: case X11: case F11: case V11: case CR2UN: return 11;
+ case R12: case X12: case F12: case V12: case CR3LT: return 12;
+ case R13: case X13: case F13: case V13: case CR3GT: return 13;
+ case R14: case X14: case F14: case V14: case CR3EQ: return 14;
+ case R15: case X15: case F15: case V15: case CR3UN: return 15;
+ case R16: case X16: case F16: case V16: case CR4LT: return 16;
+ case R17: case X17: case F17: case V17: case CR4GT: return 17;
+ case R18: case X18: case F18: case V18: case CR4EQ: return 18;
+ case R19: case X19: case F19: case V19: case CR4UN: return 19;
+ case R20: case X20: case F20: case V20: case CR5LT: return 20;
+ case R21: case X21: case F21: case V21: case CR5GT: return 21;
+ case R22: case X22: case F22: case V22: case CR5EQ: return 22;
+ case R23: case X23: case F23: case V23: case CR5UN: return 23;
+ case R24: case X24: case F24: case V24: case CR6LT: return 24;
+ case R25: case X25: case F25: case V25: case CR6GT: return 25;
+ case R26: case X26: case F26: case V26: case CR6EQ: return 26;
+ case R27: case X27: case F27: case V27: case CR6UN: return 27;
+ case R28: case X28: case F28: case V28: case CR7LT: return 28;
+ case R29: case X29: case F29: case V29: case CR7GT: return 29;
+ case R30: case X30: case F30: case V30: case CR7EQ: return 30;
+ case R31: case X31: case F31: case V31: case CR7UN: return 31;
+ default:
+ llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
+ }
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index b3c889e..b3c889e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index b6dca83..e9424d8 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -31,6 +31,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
}
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+ if (is64Bit)
+ PointerSize = 8;
+ IsLittleEndian = false;
+
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
@@ -56,7 +60,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
- HasLCOMMDirective = true;
+ LCOMMDirectiveType = LCOMM::NoAlignment;
AssemblerDialect = 0; // Old-Style mnemonics.
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index cf73d86..262f97c3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -12,9 +12,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "PPC.h"
-#include "PPCRegisterInfo.h"
-#include "PPCFixupKinds.h"
+#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInst.h"
#include "llvm/ADT/Statistic.h"
@@ -170,7 +169,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ return 0x80 >> getPPCRegisterNumbering(MO.getReg());
}
@@ -182,7 +181,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
// The GPR operand should come through here though.
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ return getPPCRegisterNumbering(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 02b887f..d5c8a9e 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -13,10 +13,14 @@
#include "PPCMCTargetDesc.h"
#include "PPCMCAsmInfo.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "PPCGenInstrInfo.inc"
@@ -35,11 +39,16 @@ static MCInstrInfo *createPPCMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializePowerPCMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
-}
+static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64);
+ unsigned Flavour = isPPC64 ? 0 : 1;
+ unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitPPCMCRegisterInfo(X, RA, Flavour, Flavour);
+ return X;
+}
static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
@@ -48,23 +57,95 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializePowerPCMCSubtargetInfo() {
+static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin())
+ MAI = new PPCMCAsmInfoDarwin(isPPC64);
+ else
+ MAI = new PPCLinuxMCAsmInfo(isPPC64);
+
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(PPC::R1, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ if (RM == Reloc::Default) {
+ Triple T(TT);
+ if (T.isOSDarwin())
+ RM = Reloc::DynamicNoPIC;
+ else
+ RM = Reloc::Static;
+ }
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ if (Triple(TT).isOSDarwin())
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ return new PPCInstPrinter(MAI, SyntaxVariant);
+}
+
+extern "C" void LLVMInitializePowerPCTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
+ RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
createPPCMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
createPPCMCSubtargetInfo);
-}
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
- Triple TheTriple(TT);
- bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
- if (TheTriple.isOSDarwin())
- return new PPCMCAsmInfoDarwin(isPPC64);
- return new PPCLinuxMCAsmInfo(isPPC64);
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
-}
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
-extern "C" void LLVMInitializePowerPCMCAsmInfo() {
- RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
- RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index cee2350..e5bf2a9 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -15,6 +15,10 @@
#define PPCMCTARGETDESC_H
namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
@@ -22,6 +26,12 @@ class StringRef;
extern Target ThePPC32Target;
extern Target ThePPC64Target;
+MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT);
+
} // End llvm namespace
// Defines symbolic names for PowerPC registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 12bb0a1..12bb0a1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index b2c8315..f872e86 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -14,8 +14,6 @@
#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
-#include "PPC.h"
-
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index 7191dd1..5dc1863 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -15,6 +15,7 @@
#ifndef LLVM_TARGET_POWERPC_H
#define LLVM_TARGET_POWERPC_H
+#include "MCTargetDesc/PPCBaseInfo.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include <string>
@@ -30,22 +31,12 @@ namespace llvm {
class MachineInstr;
class AsmPrinter;
class MCInst;
- class MCCodeEmitter;
- class MCContext;
- class MCInstrInfo;
- class MCSubtargetInfo;
class TargetMachine;
- class TargetAsmBackend;
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
JITCodeEmitter &MCE);
- MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
- TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
-
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index aabf494..2d5d302 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -43,9 +43,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
"Enable GPUL instructions">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
- "Enable the fsqrt instruction">;
+ "Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
- "Enable the stfiwx instruction">;
+ "Enable the stfiwx instruction">;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 9de2200..9528459 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,9 +18,9 @@
#define DEBUG_TYPE "asmprinter"
#include "PPC.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
#include "PPCSubtarget.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -43,11 +43,11 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
@@ -679,18 +679,8 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
return new PPCLinuxAsmPrinter(tm, Streamer);
}
-static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- return new PPCInstPrinter(MAI, SyntaxVariant);
-}
-
-
// Force static initialization.
extern "C" void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
-
- TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index e161d23..475edf3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -19,7 +19,7 @@
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
-#include "PPCPredicates.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/Statistic.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 42232a0..4a1f182 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -140,7 +140,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(OpNo);
assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ return 0x80 >> getPPCRegisterNumbering(MO.getReg());
}
MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
@@ -250,7 +250,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
// The GPR operand should come through here though.
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ return getPPCRegisterNumbering(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 375e000..7dead10 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -109,14 +109,14 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
for (MachineRegisterInfo::livein_iterator
I = MF->getRegInfo().livein_begin(),
E = MF->getRegInfo().livein_end(); I != E; ++I) {
- unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+ unsigned RegNo = getPPCRegisterNumbering(I->first);
if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
for (MachineRegisterInfo::liveout_iterator
I = MF->getRegInfo().liveout_begin(),
E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+ unsigned RegNo = getPPCRegisterNumbering(*I);
if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
@@ -712,13 +712,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(PPC::R1, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
static bool spillsCR(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->isCRSpilled();
@@ -885,7 +878,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
- LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8;
+ LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8;
}
// Check whether the frame pointer register is allocated. If so, make sure it
@@ -919,8 +912,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
unsigned MinReg =
- std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR),
- PPCRegisterInfo::getRegisterNumbering(MinG8R));
+ std::min<unsigned>(getPPCRegisterNumbering(MinGPR),
+ getPPCRegisterNumbering(MinG8R));
if (Subtarget.isPPC64()) {
LowerBound -= (31 - MinReg + 1) * 8;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 0c18de1..20faa71 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -40,7 +40,6 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool needsFP(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 2176c02..6f204cc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,8 +14,8 @@
#define DEBUG_TYPE "ppc-codegen"
#include "PPC.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9741a39..d6b8a9e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14,8 +14,8 @@
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -211,7 +211,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// TRAMPOLINE is custom lowered.
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -365,7 +366,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
setStackPointerRegisterToSaveRestore(PPC::X1);
@@ -401,12 +406,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (PPCSubTarget.isDarwin())
setPrefFunctionAlignment(4);
+ setInsertFencesForAtomic(true);
+
computeRegisterProperties();
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
if (TM.getSubtarget<PPCSubtarget>().isDarwin())
@@ -463,7 +470,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i32;
}
@@ -1368,8 +1375,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
}
-SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -1378,7 +1390,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
- const Type *IntPtrTy =
+ Type *IntPtrTy =
DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
*DAG.getContext());
@@ -1398,16 +1410,13 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
+ LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
Args, DAG, dl);
- SDValue Ops[] =
- { CallResult.first, CallResult.second };
-
- return DAG.getMergeValues(Ops, 2, dl);
+ return CallResult.second;
}
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
@@ -2550,7 +2559,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
G->getGlobal()->isWeakForLinker())) {
@@ -2574,7 +2583,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned char OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
@@ -2941,6 +2950,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
SmallVector<SDValue, 8> MemOpChains;
+ bool seenFloatArg = false;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, j = 0, e = ArgLocs.size();
i != e;
@@ -2985,6 +2995,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
}
if (VA.isRegLoc()) {
+ seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
@@ -3011,9 +3022,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Set CR6 to true if this is a vararg call.
+ // Set CR6 to true if this is a vararg call with floating args passed in
+ // registers.
if (isVarArg) {
- SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
+ SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
+ dl, MVT::i32), 0);
RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
}
@@ -3403,6 +3416,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
Ins, InVals);
}
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
SDValue
PPCTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -4490,7 +4514,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG, PPCSubTarget);
@@ -5504,7 +5529,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
@@ -5634,7 +5659,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
// FIXME: PPC does not allow r+i addressing modes for vectors!
// PPC allows a sign-extended 16-bit immediate field.
@@ -5670,7 +5695,7 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode for load / store of the
/// given type.
-bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
// PPC allows a sign-extended 16-bit immediate field.
return (V > -(1 << 16) && V < (1 << 16)-1);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 986b4e7..430e45e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -246,7 +246,7 @@ namespace llvm {
virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
@@ -323,7 +323,7 @@ namespace llvm {
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
- unsigned getByValTypeAlignment(const Type *Ty) const;
+ unsigned getByValTypeAlignment(Type *Ty) const;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
@@ -334,12 +334,12 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode for load / store of the
/// given type.
- virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+ virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
/// isLegalAddressImmediate - Return true if the GlobalValue can be used as
/// the offset of the target addressing mode.
@@ -390,7 +390,8 @@ namespace llvm {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
@@ -444,6 +445,12 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
virtual SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 143444f..2bc109c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -15,18 +15,18 @@
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
#include "PPCHazardRecognizers.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/STLExtras.h"
@@ -334,7 +334,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const{
DebugLoc DL;
- if (RC == PPC::GPRCRegisterClass) {
+ if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
if (SrcReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
.addReg(SrcReg,
@@ -350,7 +350,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (RC == PPC::G8RCRegisterClass) {
+ } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
if (SrcReg != PPC::LR8) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
.addReg(SrcReg,
@@ -366,17 +366,17 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (RC == PPC::F8RCRegisterClass) {
+ } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (RC == PPC::F4RCRegisterClass) {
+ } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (RC == PPC::CRRCRegisterClass) {
+ } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
(EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
// FIXME (64-bit): Enable
@@ -402,7 +402,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
if (SrcReg != PPC::CR0) {
- unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
+ unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
// rlwinm scratch, scratch, ShiftBits, 0, 31.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(ShiftBits)
@@ -414,7 +414,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (RC == PPC::CRBITRCRegisterClass) {
+ } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
// not cause any bug. If we need other uses of CR bits, the following
@@ -448,7 +448,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
PPC::CRRCRegisterClass, NewMIs);
- } else if (RC == PPC::VRRCRegisterClass) {
+ } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// STVX VAL, 0, R0
@@ -499,7 +499,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs)const{
- if (RC == PPC::GPRCRegisterClass) {
+ if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
if (DestReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
DestReg), FrameIdx));
@@ -508,7 +508,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::R11), FrameIdx));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
}
- } else if (RC == PPC::G8RCRegisterClass) {
+ } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
if (DestReg != PPC::LR8) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
FrameIdx));
@@ -517,13 +517,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::R11), FrameIdx));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
}
- } else if (RC == PPC::F8RCRegisterClass) {
+ } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
FrameIdx));
- } else if (RC == PPC::F4RCRegisterClass) {
+ } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
- } else if (RC == PPC::CRRCRegisterClass) {
+ } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
// FIXME: We need a scatch reg here. The trouble with using R0 is that
// it's possible for the stack frame to be so big the save location is
// out of range of immediate offsets, necessitating another register.
@@ -537,7 +537,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
- unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
@@ -546,7 +546,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
.addReg(ScratchReg));
- } else if (RC == PPC::CRBITRCRegisterClass) {
+ } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
unsigned Reg = 0;
if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
@@ -577,7 +577,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
PPC::CRRCRegisterClass, NewMIs);
- } else if (RC == PPC::VRRCRegisterClass) {
+ } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// Dest = LVX 0, R0
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 773578c..f248b5b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1053,6 +1053,10 @@ def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
"creqv $dst, $dst, $dst", BrCR,
[]>;
+def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
+ "crxor $dst, $dst, $dst", BrCR,
+ []>;
+
// XFX-Form instructions. Instructions that deal with SPRs.
//
let Uses = [CTR] in {
@@ -1472,5 +1476,7 @@ def : Pat<(membarrier (i32 imm /*ll*/),
(i32 imm /*device*/)),
(SYNC)>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
+
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9c2428b..2e90b7a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -68,52 +67,12 @@ PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
(EnablePPC64RS && Subtarget.isPPC64()));
}
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
- using namespace PPC;
- switch (RegEnum) {
- case 0: return 0;
- case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
- case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
- case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
- case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
- case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
- case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
- case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
- case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
- case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
- case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
- case R10: case X10: case F10: case V10: case CR2EQ: return 10;
- case R11: case X11: case F11: case V11: case CR2UN: return 11;
- case R12: case X12: case F12: case V12: case CR3LT: return 12;
- case R13: case X13: case F13: case V13: case CR3GT: return 13;
- case R14: case X14: case F14: case V14: case CR3EQ: return 14;
- case R15: case X15: case F15: case V15: case CR3UN: return 15;
- case R16: case X16: case F16: case V16: case CR4LT: return 16;
- case R17: case X17: case F17: case V17: case CR4GT: return 17;
- case R18: case X18: case F18: case V18: case CR4EQ: return 18;
- case R19: case X19: case F19: case V19: case CR4UN: return 19;
- case R20: case X20: case F20: case V20: case CR5LT: return 20;
- case R21: case X21: case F21: case V21: case CR5GT: return 21;
- case R22: case X22: case F22: case V22: case CR5EQ: return 22;
- case R23: case X23: case F23: case V23: case CR5UN: return 23;
- case R24: case X24: case F24: case V24: case CR6LT: return 24;
- case R25: case X25: case F25: case V25: case CR6GT: return 25;
- case R26: case X26: case F26: case V26: case CR6EQ: return 26;
- case R27: case X27: case F27: case V27: case CR6UN: return 27;
- case R28: case X28: case F28: case V28: case CR7LT: return 28;
- case R29: case X29: case F29: case V29: case CR7GT: return 29;
- case R30: case X30: case F30: case V30: case CR7EQ: return 30;
- case R31: case X31: case F31: case V31: case CR7UN: return 31;
- default:
- llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
- }
-}
-
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetInstrInfo &tii)
- : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) {
+ : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
+ ST.isPPC64() ? 0 : 1,
+ ST.isPPC64() ? 0 : 1),
+ Subtarget(ST), TII(tii) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -519,7 +478,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// rlwinm rA, rA, ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
.addReg(Reg, RegState::Kill)
- .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4)
+ .addImm(getPPCRegisterNumbering(SrcReg) * 4)
.addImm(0)
.addImm(31);
@@ -668,10 +627,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
}
-unsigned PPCRegisterInfo::getRARegister() const {
- return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
-}
-
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -688,27 +643,3 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const {
unsigned PPCRegisterInfo::getEHHandlerRegister() const {
return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
}
-
-/// DWARFFlavour - Flavour of dwarf regnumbers
-///
-namespace DWARFFlavour {
- enum {
- PPC64 = 0, PPC32 = 1
- };
-}
-
-int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- // FIXME: Most probably dwarf numbers differs for Linux and Darwin
- unsigned Flavour = Subtarget.isPPC64() ?
- DWARFFlavour::PPC64 : DWARFFlavour::PPC32;
-
- return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, Flavour);
-}
-
-int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
- // FIXME: Most probably dwarf numbers differs for Linux and Darwin
- unsigned Flavour = Subtarget.isPPC64() ?
- DWARFFlavour::PPC64 : DWARFFlavour::PPC32;
-
- return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour);
-}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 33fe5eb..1cc7213 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -33,10 +33,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
public:
PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
- /// getRegisterNumbering - Given the enum value for some register, e.g.
- /// PPC::F14, return the number that it corresponds to (e.g. 14).
- static unsigned getRegisterNumbering(unsigned RegEnum);
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;
@@ -62,15 +58,11 @@ public:
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 5ea9b0f..cf194de 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -15,7 +15,7 @@
#include "PPC.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include <cstdlib>
#define GET_SUBTARGETINFO_TARGET_DESC
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index e0ea5ad..f5744b8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -16,76 +16,43 @@
#include "llvm/PassManager.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
- MCContext &Ctx, TargetAsmBackend &TAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- if (Triple(TT).isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
-
- return NULL;
-}
-
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
-
-
- // Register the asm backend.
- TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend);
- TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer);
- TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer);
}
-
-PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
-
- if (getRelocationModel() == Reloc::Default) {
- if (Subtarget.isDarwin())
- setRelocationModel(Reloc::DynamicNoPIC);
- else
- setRelocationModel(Reloc::Static);
- }
}
/// Override this for PowerPC. Tail merging happily breaks up instruction issue
/// groups, which typically degrades performance.
bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
-PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : PPCTargetMachine(T, TT, CPU, FS, false) {
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) {
}
-PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : PPCTargetMachine(T, TT, CPU, FS, true) {
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) {
}
@@ -110,19 +77,11 @@ bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
- // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
// FIXME: This should be moved to TargetJITInfo!!
- if (Subtarget.isPPC64()) {
- // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
- // instructions to materialize arbitrary global variable + function +
- // constant pool addresses.
- setRelocationModel(Reloc::PIC_);
+ if (Subtarget.isPPC64())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
DisableJumpTables = true;
- } else {
- setRelocationModel(Reloc::Static);
- }
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index baf07e3..d06f084 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -40,9 +40,9 @@ class PPCTargetMachine : public LLVMTargetMachine {
InstrItineraryData InstrItins;
public:
- PPCTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
- bool is64Bit);
+ PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM, bool is64Bit);
virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const PPCFrameLowering *getFrameLowering() const {
@@ -77,16 +77,18 @@ public:
///
class PPC32TargetMachine : public PPCTargetMachine {
public:
- PPC32TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
/// PPC64TargetMachine - PowerPC 64-bit target machine.
///
class PPC64TargetMachine : public PPCTargetMachine {
public:
- PPC64TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index ad607d0..5dc8568 100644
--- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "PPC.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::ThePPC32Target, llvm::ThePPC64Target;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 1e8c029..0000000
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMSparcDesc
- SparcMCTargetDesc.cpp
- SparcMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile
deleted file mode 100644
index abcbe2d..0000000
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Sparc/TargetDesc/Makefile ----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSparcDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index cb92a2b..cb2a7df 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "SparcMCTargetDesc.h"
#include "SparcMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "SparcGenInstrInfo.inc"
@@ -35,8 +36,10 @@ static MCInstrInfo *createSparcMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeSparcMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSparcMCRegisterInfo(X, SP::I7);
+ return X;
}
static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -46,12 +49,31 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeSparcMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
- createSparcMCSubtargetInfo);
+static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
}
-extern "C" void LLVMInitializeSparcMCAsmInfo() {
+extern "C" void LLVMInitializeSparcTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
+ createSparcMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
+ createSparcMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
+ createSparcMCSubtargetInfo);
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index edde842..345e1bc 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -22,9 +22,9 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 6f30d3f..d70b163 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -631,8 +631,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
assert(CalleeFn->hasStructRetAttr() &&
"Callee does not have the StructRet attribute.");
- const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
- const Type *ElementTy = Ty->getElementType();
+ PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+ Type *ElementTy = Ty->getElementType();
return getTargetData()->getTypeAllocSize(ElementTy);
}
@@ -748,8 +748,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
- // SPARC has no intrinsics for these particular operations.
+ // FIXME: There are instructions available for ATOMIC_FENCE
+ // on SparcV8 and later.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 4e3ddf8..7a6bf50 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -17,8 +17,8 @@
#include "SparcSubtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 0acdd2c..8c16251 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Type.h"
@@ -31,7 +30,7 @@ using namespace llvm;
SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
const TargetInstrInfo &tii)
- : SparcGenRegisterInfo(), Subtarget(st), TII(tii) {
+ : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) {
}
const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
@@ -113,10 +112,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
void SparcRegisterInfo::
processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
-unsigned SparcRegisterInfo::getRARegister() const {
- return SP::I7;
-}
-
unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
@@ -130,11 +125,3 @@ unsigned SparcRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
return 0;
}
-
-int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index ec9e63a..f845667 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -46,15 +46,11 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index de647e8..6c501cf 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -13,7 +13,7 @@
#include "SparcSubtarget.h"
#include "Sparc.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index cbe6d87..3d7b4a4 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -13,7 +13,7 @@
#include "Sparc.h"
#include "SparcTargetMachine.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeSparcTarget() {
@@ -24,10 +24,11 @@ extern "C" void LLVMInitializeSparcTarget() {
/// SparcTargetMachine ctor - Create an ILP32 architecture model
///
-SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
@@ -51,15 +52,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
}
SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : SparcTargetMachine(T, TT, CPU, FS, false) {
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) {
}
SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : SparcTargetMachine(T, TT, CPU, FS, true) {
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) {
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
index 799fc49..3c907dd 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -33,9 +33,9 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcInstrInfo InstrInfo;
SparcFrameLowering FrameLowering;
public:
- SparcTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
- bool is64bit);
+ SparcTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM, bool is64bit);
virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
@@ -62,16 +62,18 @@ public:
///
class SparcV8TargetMachine : public SparcTargetMachine {
public:
- SparcV8TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ SparcV8TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
/// SparcV9TargetMachine - Sparc 64-bit target machine
///
class SparcV9TargetMachine : public SparcTargetMachine {
public:
- SparcV9TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ SparcV9TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index 5c06f07..c9d5b7b 100644
--- a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "Sparc.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheSparcTarget;
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 2ac9016..0000000
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_llvm_library(LLVMSystemZDesc
- SystemZMCTargetDesc.cpp
- SystemZMCAsmInfo.cpp
- )
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile
deleted file mode 100644
index 08f1a9d..0000000
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 5a826a6..23fb1e0 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "SystemZMCTargetDesc.h"
#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "SystemZGenInstrInfo.inc"
@@ -35,9 +36,10 @@ static MCInstrInfo *createSystemZMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeSystemZMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
- createSystemZMCInstrInfo);
+static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSystemZMCRegisterInfo(X, 0);
+ return X;
}
static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
@@ -48,11 +50,32 @@ static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
return X;
}
-extern "C" void LLVMInitializeSystemZMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
- createSystemZMCSubtargetInfo);
+static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
}
-extern "C" void LLVMInitializeSystemZMCAsmInfo() {
+extern "C" void LLVMInitializeSystemZTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
+ createSystemZMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+ createSystemZMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
+ createSystemZMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+ createSystemZMCSubtargetInfo);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index fd4d8b7..43dcdfc 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -28,10 +28,8 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 871c297..48ca99f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,6 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
setSchedulingPreference(Sched::RegPressure);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 99e2730..5f3dd80 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_CTOR
#include "SystemZGenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 11a39fc..580d65b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -478,7 +478,7 @@ def MOV64rmm : RSYI<0x04EB,
"lmg\t{$from, $to, $dst}",
[]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1,
+let isReMaterializable = 1, neverHasSideEffects = 1, isAsCheapAsAMove = 1,
Constraints = "$src = $dst" in {
def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
"lhi\t${dst:subreg_even}, 0",
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 59692e8..b1050d4 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
const SystemZInstrInfo &tii)
- : SystemZGenRegisterInfo(), TM(tm), TII(tii) {
+ : SystemZGenRegisterInfo(0), TM(tm), TII(tii) {
}
const unsigned*
@@ -126,11 +126,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i+1).ChangeToImmediate(Offset);
}
-unsigned SystemZRegisterInfo::getRARegister() const {
- assert(0 && "What is the return address register");
- return 0;
-}
-
unsigned
SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
assert(0 && "What is the frame register");
@@ -146,13 +141,3 @@ unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
assert(0 && "What is the exception handler register");
return 0;
}
-
-int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- assert(0 && "What is the dwarf register number");
- return -1;
-}
-
-int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- assert(0 && "What is the dwarf register number");
- return -1;
-}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 2e262e1..03935b2 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -48,15 +48,11 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index b3ed066..0845510 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -15,7 +15,7 @@
#include "SystemZ.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 48298cc..e390f06 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -10,7 +10,7 @@
#include "SystemZTargetMachine.h"
#include "SystemZ.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeSystemZTarget() {
@@ -21,18 +21,15 @@ extern "C" void LLVMInitializeSystemZTarget() {
/// SystemZTargetMachine ctor - Create an ILP64 architecture model
///
SystemZTargetMachine::SystemZTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
"-f64:64:64-f128:128:128-a0:16:16-n32:64"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameLowering(Subtarget) {
-
- if (getRelocationModel() == Reloc::Default)
- setRelocationModel(Reloc::Static);
}
bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index e40b556..43dce4b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -37,8 +37,9 @@ class SystemZTargetMachine : public LLVMTargetMachine {
SystemZSelectionDAGInfo TSInfo;
SystemZFrameLowering FrameLowering;
public:
- SystemZTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ SystemZTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index 8272b11..da99282 100644
--- a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "SystemZ.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheSystemZTarget;
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
index a42ce54..a2b83bc 100644
--- a/contrib/llvm/lib/Target/Target.cpp
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -17,6 +17,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/LLVMContext.h"
#include <cstring>
@@ -39,6 +40,11 @@ void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
unwrap(PM)->add(new TargetData(*unwrap(TD)));
}
+void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
+ LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new TargetLibraryInfo(*unwrap(TLI)));
+}
+
char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
std::string StringRep = unwrap(TD)->getStringRepresentation();
return strdup(StringRep.c_str());
@@ -87,13 +93,13 @@ unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
unsigned long long Offset) {
- const StructType *STy = unwrap<StructType>(StructTy);
+ StructType *STy = unwrap<StructType>(StructTy);
return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
}
unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
unsigned Element) {
- const StructType *STy = unwrap<StructType>(StructTy);
+ StructType *STy = unwrap<StructType>(StructTy);
return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
}
diff --git a/contrib/llvm/lib/Target/TargetAsmInfo.cpp b/contrib/llvm/lib/Target/TargetAsmInfo.cpp
deleted file mode 100644
index a97b0e8..0000000
--- a/contrib/llvm/lib/Target/TargetAsmInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-using namespace llvm;
-
-TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
- TLOF = &TM.getTargetLowering()->getObjFileLowering();
- TFI = TM.getFrameLowering();
- TRI = TM.getRegisterInfo();
- TFI->getInitialFrameState(InitialFrameState);
-}
diff --git a/contrib/llvm/lib/Target/TargetAsmLexer.cpp b/contrib/llvm/lib/Target/TargetAsmLexer.cpp
deleted file mode 100644
index d4893ff..0000000
--- a/contrib/llvm/lib/Target/TargetAsmLexer.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===-- llvm/Target/TargetAsmLexer.cpp - Target Assembly Lexer ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetAsmLexer.h"
-using namespace llvm;
-
-TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {}
-TargetAsmLexer::~TargetAsmLexer() {}
diff --git a/contrib/llvm/lib/Target/TargetData.cpp b/contrib/llvm/lib/Target/TargetData.cpp
index 17d022a..bd6a6b6 100644
--- a/contrib/llvm/lib/Target/TargetData.cpp
+++ b/contrib/llvm/lib/Target/TargetData.cpp
@@ -41,7 +41,7 @@ char TargetData::ID = 0;
// Support for StructLayout
//===----------------------------------------------------------------------===//
-StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+StructLayout::StructLayout(StructType *ST, const TargetData &TD) {
assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
StructAlignment = 0;
StructSize = 0;
@@ -49,7 +49,7 @@ StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
// Loop over each of the elements, placing them in memory.
for (unsigned i = 0, e = NumElements; i != e; ++i) {
- const Type *Ty = ST->getElementType(i);
+ Type *Ty = ST->getElementType(i);
unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
// Add padding if necessary to align the data element properly.
@@ -139,6 +139,7 @@ void TargetData::init(StringRef Desc) {
PointerMemSize = 8;
PointerABIAlign = 8;
PointerPrefAlign = PointerABIAlign;
+ StackNaturalAlign = 0;
// Default alignments
setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1
@@ -218,7 +219,12 @@ void TargetData::init(StringRef Desc) {
Token = Split.second;
} while (!Specifier.empty() || !Token.empty());
break;
-
+ case 'S': // Stack natural alignment.
+ StackNaturalAlign = getInt(Specifier.substr(1));
+ StackNaturalAlign /= 8;
+ // FIXME: Should we really be truncating these alingments and
+ // sizes silently?
+ break;
default:
break;
}
@@ -261,7 +267,7 @@ TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
/// preferred if ABIInfo = false) the target wants for the specified datatype.
unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
uint32_t BitWidth, bool ABIInfo,
- const Type *Ty) const {
+ Type *Ty) const {
// Check to see if we have an exact match and remember the best match we see.
int BestMatchIdx = -1;
int LargestInt = -1;
@@ -315,7 +321,7 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
namespace {
class StructLayoutMap {
- typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
+ typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy;
LayoutInfoTy LayoutInfo;
public:
@@ -329,7 +335,7 @@ public:
}
}
- StructLayout *&operator[](const StructType *STy) {
+ StructLayout *&operator[](StructType *STy) {
return LayoutInfo[STy];
}
@@ -343,7 +349,7 @@ TargetData::~TargetData() {
delete static_cast<StructLayoutMap*>(LayoutMap);
}
-const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
+const StructLayout *TargetData::getStructLayout(StructType *Ty) const {
if (!LayoutMap)
LayoutMap = new StructLayoutMap();
@@ -372,7 +378,9 @@ std::string TargetData::getStringRepresentation() const {
OS << (LittleEndian ? "e" : "E")
<< "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
- << ':' << PointerPrefAlign*8;
+ << ':' << PointerPrefAlign*8
+ << "-S" << StackNaturalAlign*8;
+
for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
const TargetAlignElem &AI = Alignments[i];
OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
@@ -389,14 +397,14 @@ std::string TargetData::getStringRepresentation() const {
}
-uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
+uint64_t TargetData::getTypeSizeInBits(Type *Ty) const {
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
switch (Ty->getTypeID()) {
case Type::LabelTyID:
case Type::PointerTyID:
return getPointerSizeInBits();
case Type::ArrayTyID: {
- const ArrayType *ATy = cast<ArrayType>(Ty);
+ ArrayType *ATy = cast<ArrayType>(Ty);
return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
}
case Type::StructTyID:
@@ -435,7 +443,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
== false) for the requested type \a Ty.
*/
-unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const {
int AlignType = -1;
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
@@ -485,7 +493,7 @@ unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
abi_or_pref, Ty);
}
-unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
+unsigned TargetData::getABITypeAlignment(Type *Ty) const {
return getAlignment(Ty, true);
}
@@ -496,7 +504,7 @@ unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
}
-unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getCallFrameTypeAlignment(Type *Ty) const {
for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
if (Alignments[i].AlignType == STACK_ALIGN)
return Alignments[i].ABIAlign;
@@ -504,11 +512,11 @@ unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
return getABITypeAlignment(Ty);
}
-unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getPrefTypeAlignment(Type *Ty) const {
return getAlignment(Ty, false);
}
-unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+unsigned TargetData::getPreferredTypeAlignmentShift(Type *Ty) const {
unsigned Align = getPrefTypeAlignment(Ty);
assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
return Log2_32(Align);
@@ -521,16 +529,17 @@ IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
}
-uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
- unsigned NumIndices) const {
- const Type *Ty = ptrTy;
+uint64_t TargetData::getIndexedOffset(Type *ptrTy,
+ ArrayRef<Value *> Indices) const {
+ Type *Ty = ptrTy;
assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
uint64_t Result = 0;
generic_gep_type_iterator<Value* const*>
- TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
- for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
- if (const StructType *STy = dyn_cast<StructType>(*TI)) {
+ TI = gep_type_begin(ptrTy, Indices);
+ for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX;
+ ++CurIDX, ++TI) {
+ if (StructType *STy = dyn_cast<StructType>(*TI)) {
assert(Indices[CurIDX]->getType() ==
Type::getInt32Ty(ptrTy->getContext()) &&
"Illegal struct idx");
@@ -561,7 +570,7 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
/// global. This includes an explicitly requested alignment (if the global
/// has one).
unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
- const Type *ElemType = GV->getType()->getElementType();
+ Type *ElemType = GV->getType()->getElementType();
unsigned Alignment = getPrefTypeAlignment(ElemType);
unsigned GVAlignment = GV->getAlignment();
if (GVAlignment >= Alignment) {
diff --git a/contrib/llvm/lib/Target/TargetFrameLowering.cpp b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
index 19fd581..122f869 100644
--- a/contrib/llvm/lib/Target/TargetFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
@@ -23,14 +23,6 @@ using namespace llvm;
TargetFrameLowering::~TargetFrameLowering() {
}
-/// getInitialFrameState - Returns a list of machine moves that are assumed
-/// on entry to a function.
-void
-TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
- // Default is to do nothing.
-}
-
/// getFrameIndexOffset - Returns the displacement from the frame register to
/// the stack frame of the specified index. This is the default implementation
/// which is overridden for some targets.
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 703431b..56b7b69 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -35,34 +35,16 @@ using namespace llvm;
// Generic Code
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFile::TargetLoweringObjectFile() :
- Ctx(0),
- TextSection(0),
- DataSection(0),
- BSSSection(0),
- ReadOnlySection(0),
- StaticCtorSection(0),
- StaticDtorSection(0),
- LSDASection(0),
- CompactUnwindSection(0),
- DwarfAbbrevSection(0),
- DwarfInfoSection(0),
- DwarfLineSection(0),
- DwarfFrameSection(0),
- DwarfPubNamesSection(0),
- DwarfPubTypesSection(0),
- DwarfDebugInlineSection(0),
- DwarfStrSection(0),
- DwarfLocSection(0),
- DwarfARangesSection(0),
- DwarfRangesSection(0),
- DwarfMacroInfoSection(0),
- TLSExtraDataSection(0),
- CommDirectiveSupportsAlignment(true),
- SupportsWeakOmittedEHFrame(true),
- IsFunctionEHFrameSymbolPrivate(true) {
+/// Initialize - this method must be called before any actual lowering is
+/// done. This specifies the current context for codegen, and gives the
+/// lowering implementations a chance to set up their default sections.
+void TargetLoweringObjectFile::Initialize(MCContext &ctx,
+ const TargetMachine &TM) {
+ Ctx = &ctx;
+ InitMCObjectFileInfo(TM.getTargetTriple(),
+ TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
}
-
+
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
}
@@ -93,7 +75,7 @@ static bool isSuitableForBSS(const GlobalVariable *GV) {
/// known to have a type that is an array of 1/2/4 byte elements) ends with a
/// nul value and contains no other nuls in it.
static bool IsNullTerminatedString(const Constant *C) {
- const ArrayType *ATy = cast<ArrayType>(C->getType());
+ ArrayType *ATy = cast<ArrayType>(C->getType());
// First check: is we have constant array of i8 terminated with zero
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
@@ -188,8 +170,8 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// If initializer is a null-terminated string, put it in a "cstring"
// section of the right width.
- if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
- if (const IntegerType *ITy =
+ if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+ if (IntegerType *ITy =
dyn_cast<IntegerType>(ATy->getElementType())) {
if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 ||
ITy->getBitWidth() == 32) &&
@@ -341,20 +323,3 @@ getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
}
}
}
-
-unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
- return dwarf::DW_EH_PE_absptr;
-}
-
-unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
- return dwarf::DW_EH_PE_absptr;
-}
-
-unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const {
- return dwarf::DW_EH_PE_absptr;
-}
-
-unsigned TargetLoweringObjectFile::getTTypeEncoding() const {
- return dwarf::DW_EH_PE_absptr;
-}
-
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 74a1f4e..fe8a7ce 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -40,8 +40,6 @@ namespace llvm {
bool JITExceptionHandling;
bool JITEmitDebugInfo;
bool JITEmitDebugInfoToDisk;
- Reloc::Model RelocationModel;
- CodeModel::Model CMModel;
bool GuaranteedTailCallOpt;
unsigned StackAlignmentOverride;
bool RealignStack;
@@ -49,6 +47,7 @@ namespace llvm {
bool StrongPHIElim;
bool HasDivModLibcall;
bool AsmVerbosityDefault(false);
+ bool EnableSegmentedStacks;
}
static cl::opt<bool, true>
@@ -143,38 +142,6 @@ EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
cl::location(JITEmitDebugInfoToDisk),
cl::init(false));
-static cl::opt<llvm::Reloc::Model, true>
-DefRelocationModel("relocation-model",
- cl::desc("Choose relocation model"),
- cl::location(RelocationModel),
- cl::init(Reloc::Default),
- cl::values(
- clEnumValN(Reloc::Default, "default",
- "Target default relocation model"),
- clEnumValN(Reloc::Static, "static",
- "Non-relocatable code"),
- clEnumValN(Reloc::PIC_, "pic",
- "Fully relocatable, position independent code"),
- clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
- "Relocatable external references, non-relocatable code"),
- clEnumValEnd));
-static cl::opt<llvm::CodeModel::Model, true>
-DefCodeModel("code-model",
- cl::desc("Choose code model"),
- cl::location(CMModel),
- cl::init(CodeModel::Default),
- cl::values(
- clEnumValN(CodeModel::Default, "default",
- "Target default code model"),
- clEnumValN(CodeModel::Small, "small",
- "Small code model"),
- clEnumValN(CodeModel::Kernel, "kernel",
- "Kernel code model"),
- clEnumValN(CodeModel::Medium, "medium",
- "Medium code model"),
- clEnumValN(CodeModel::Large, "large",
- "Large code model"),
- clEnumValEnd));
static cl::opt<bool, true>
EnableGuaranteedTailCallOpt("tailcallopt",
cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
@@ -212,13 +179,20 @@ static cl::opt<bool>
FunctionSections("ffunction-sections",
cl::desc("Emit functions into separate sections"),
cl::init(false));
+static cl::opt<bool, true>
+SegmentedStacks("segmented-stacks",
+ cl::desc("Use segmented stacks if possible."),
+ cl::location(EnableSegmentedStacks),
+ cl::init(false));
+
//---------------------------------------------------------------------------
// TargetMachine Class
//
TargetMachine::TargetMachine(const Target &T,
StringRef TT, StringRef CPU, StringRef FS)
- : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0),
+ : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
+ CodeGenInfo(0), AsmInfo(0),
MCRelaxAll(false),
MCNoExecStack(false),
MCSaveTempLabels(false),
@@ -231,29 +205,24 @@ TargetMachine::TargetMachine(const Target &T,
}
TargetMachine::~TargetMachine() {
+ delete CodeGenInfo;
delete AsmInfo;
}
/// getRelocationModel - Returns the code generation relocation model. The
/// choices are static, PIC, and dynamic-no-pic, and target default.
-Reloc::Model TargetMachine::getRelocationModel() {
- return RelocationModel;
-}
-
-/// setRelocationModel - Sets the code generation relocation model.
-void TargetMachine::setRelocationModel(Reloc::Model Model) {
- RelocationModel = Model;
+Reloc::Model TargetMachine::getRelocationModel() const {
+ if (!CodeGenInfo)
+ return Reloc::Default;
+ return CodeGenInfo->getRelocationModel();
}
/// getCodeModel - Returns the code model. The choices are small, kernel,
/// medium, large, and target default.
-CodeModel::Model TargetMachine::getCodeModel() {
- return CMModel;
-}
-
-/// setCodeModel - Sets the code model.
-void TargetMachine::setCodeModel(CodeModel::Model Model) {
- CMModel = Model;
+CodeModel::Model TargetMachine::getCodeModel() const {
+ if (!CodeGenInfo)
+ return CodeModel::Default;
+ return CodeGenInfo->getCodeModel();
}
bool TargetMachine::getAsmVerbosityDefault() {
diff --git a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
index 90a8f8d..67239b8 100644
--- a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
@@ -98,44 +98,25 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
}
const TargetRegisterClass *
-llvm::getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B) {
- // First take care of the trivial cases
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) const {
+ // First take care of the trivial cases.
if (A == B)
return A;
if (!A || !B)
return 0;
- // If B is a subclass of A, it will be handled in the loop below
- if (B->hasSubClass(A))
- return A;
+ // Register classes are ordered topologically, so the largest common
+ // sub-class it the common sub-class with the smallest ID.
+ const unsigned *SubA = A->getSubClassMask();
+ const unsigned *SubB = B->getSubClassMask();
- const TargetRegisterClass *Best = 0;
- for (TargetRegisterClass::sc_iterator I = A->subclasses_begin();
- const TargetRegisterClass *X = *I; ++I) {
- if (X == B)
- return B; // B is a subclass of A
-
- // X must be a common subclass of A and B
- if (!B->hasSubClass(X))
- continue;
-
- // A superclass is definitely better.
- if (!Best || Best->hasSuperClass(X)) {
- Best = X;
- continue;
- }
-
- // A subclass is definitely worse
- if (Best->hasSubClass(X))
- continue;
-
- // Best and *I have no super/sub class relation - pick the larger class, or
- // the smaller spill size.
- int nb = std::distance(Best->begin(), Best->end());
- int ni = std::distance(X->begin(), X->end());
- if (ni>nb || (ni==nb && X->getSize() < Best->getSize()))
- Best = X;
- }
- return Best;
+ // We could start the search from max(A.ID, B.ID), but we are only going to
+ // execute 2-3 iterations anyway.
+ for (unsigned Base = 0, BaseE = getNumRegClasses(); Base < BaseE; Base += 32)
+ if (unsigned Common = *SubA++ & *SubB++)
+ return getRegClass(Base + CountTrailingZeros_32(Common));
+
+ // No common sub-class exists.
+ return NULL;
}
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index ec73087..1eaccff 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -7,20 +7,20 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Target/TargetAsmLexer.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "X86.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
using namespace llvm;
namespace {
-class X86AsmLexer : public TargetAsmLexer {
+class X86AsmLexer : public MCTargetAsmLexer {
const MCAsmInfo &AsmInfo;
bool tentativeIsValid;
@@ -60,8 +60,8 @@ protected:
}
}
public:
- X86AsmLexer(const Target &T, const MCAsmInfo &MAI)
- : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
+ X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
+ : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
}
};
@@ -160,6 +160,6 @@ AsmToken X86AsmLexer::LexTokenIntel() {
}
extern "C" void LLVMInitializeX86AsmLexer() {
- RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
- RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
+ RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target);
+ RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target);
}
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d45dd35..cb4f15f 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -7,14 +7,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmParser.h"
-#include "X86.h"
-#include "X86Subtarget.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
@@ -26,6 +24,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -33,7 +32,7 @@ using namespace llvm;
namespace {
struct X86Operand;
-class X86ATTAsmParser : public TargetAsmParser {
+class X86ATTAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
@@ -48,6 +47,7 @@ private:
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -65,6 +65,10 @@ private:
// FIXME: Can tablegen auto-generate this?
return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
}
+ void SwitchMode() {
+ unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
+ setAvailableFeatures(FB);
+ }
/// @name Auto-generated Matcher Functions
/// {
@@ -76,7 +80,7 @@ private:
public:
X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : TargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -223,6 +227,21 @@ struct X86Operand : public MCParsedAsmOperand {
(0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
+ bool isImmZExtu32u8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (Value <= 0x00000000000000FFULL);
+ }
bool isImmSExti64i8() const {
if (!isImm())
return false;
@@ -382,19 +401,25 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
if (Tok.isNot(AsmToken::Identifier))
return Error(Tok.getLoc(), "invalid register name");
- // FIXME: Validate register for the current architecture; we have to do
- // validation later, so maybe there is no need for this here.
RegNo = MatchRegisterName(Tok.getString());
// If the match failed, try the register name as lowercase.
if (RegNo == 0)
RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
- // FIXME: This should be done using Requires<In32BitMode> and
- // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
- // can be also checked.
- if (RegNo == X86::RIZ && !is64BitMode())
- return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+ if (!is64BitMode()) {
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
+ // checked.
+ // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
+ // REX prefix.
+ if (RegNo == X86::RIZ ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
+ X86II::isX86_64NonExtLowByteReg(RegNo) ||
+ X86II::isX86_64ExtendedReg(RegNo))
+ return Error(Tok.getLoc(), "register %"
+ + Tok.getString() + " is only available in 64-bit mode");
+ }
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
@@ -472,7 +497,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
- Error(Start, "eiz and riz can only be used as index registers");
+ Error(Start, "%eiz and %riz can only be used as index registers");
return 0;
}
@@ -956,6 +981,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// First, try a direct match.
switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+ default: break;
case Match_Success:
Out.EmitInstruction(Inst);
return false;
@@ -994,7 +1020,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
- MatchResultTy Match1, Match2, Match3, Match4;
+ unsigned Match1, Match2, Match3, Match4;
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
Tmp[Base.size()] = Suffixes[1];
@@ -1096,6 +1122,8 @@ bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
return ParseDirectiveWord(2, DirectiveID.getLoc());
+ else if (IDVal.startswith(".code"))
+ return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
return true;
}
@@ -1124,15 +1152,35 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// ParseDirectiveCode
+/// ::= .code32 | .code64
+bool X86ATTAsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
+ if (IDVal == ".code32") {
+ Parser.Lex();
+ if (is64BitMode()) {
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+ } else if (IDVal == ".code64") {
+ Parser.Lex();
+ if (!is64BitMode()) {
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
+ }
+ } else {
+ return Error(L, "unexpected directive " + IDVal);
+ }
+ return false;
+}
extern "C" void LLVMInitializeX86AsmLexer();
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
- RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
- RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+ RegisterMCAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+ RegisterMCAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
LLVMInitializeX86AsmLexer();
}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 4a0d2ec..3aacb20 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -21,13 +21,16 @@
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_REGINFO_ENUM
#include "X86GenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
#include "X86GenEDInfo.inc"
using namespace llvm;
@@ -64,8 +67,8 @@ extern Target TheX86_32Target, TheX86_64Target;
static bool translateInstruction(MCInst &target,
InternalInstruction &source);
-X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
- MCDisassembler(),
+X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) :
+ MCDisassembler(STI),
fMode(mode) {
}
@@ -106,28 +109,34 @@ static void logger(void* arg, const char* log) {
// Public interface for the disassembler
//
-bool X86GenericDisassembler::getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream) const {
+MCDisassembler::DecodeStatus
+X86GenericDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
InternalInstruction internalInstr;
+
+ dlog_t loggerFn = logger;
+ if (&vStream == &nulls())
+ loggerFn = 0; // Disable logging completely if it's going to nulls().
int ret = decodeInstruction(&internalInstr,
regionReader,
(void*)&region,
- logger,
+ loggerFn,
(void*)&vStream,
address,
fMode);
if (ret) {
size = internalInstr.readerCursor - address;
- return false;
+ return Fail;
}
else {
size = internalInstr.length;
- return !translateInstruction(instr, internalInstr);
+ return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
}
}
@@ -183,8 +192,46 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
break;
}
}
+ // By default sign-extend all X86 immediates based on their encoding.
+ else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
+ type == TYPE_IMM64) {
+ uint32_t Opcode = mcInst.getOpcode();
+ switch (operand.encoding) {
+ default:
+ break;
+ case ENCODING_IB:
+ // Special case those X86 instructions that use the imm8 as a set of
+ // bits, bit count, etc. and are not sign-extend.
+ if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
+ Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
+ Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
+ Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
+ Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
+ Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
+ Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
+ Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
+ Opcode != X86::VINSERTPSrr)
+ type = TYPE_MOFFS8;
+ break;
+ case ENCODING_IW:
+ type = TYPE_MOFFS16;
+ break;
+ case ENCODING_ID:
+ type = TYPE_MOFFS32;
+ break;
+ case ENCODING_IO:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
switch (type) {
+ case TYPE_XMM128:
+ mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
+ return;
+ case TYPE_XMM256:
+ mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
+ return;
case TYPE_MOFFS8:
case TYPE_REL8:
if(immediate & 0x80)
@@ -543,12 +590,12 @@ static bool translateInstruction(MCInst &mcInst,
return false;
}
-static MCDisassembler *createX86_32Disassembler(const Target &T) {
- return new X86Disassembler::X86_32Disassembler;
+static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86_32Disassembler(STI);
}
-static MCDisassembler *createX86_64Disassembler(const Target &T) {
- return new X86Disassembler::X86_64Disassembler;
+static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86_64Disassembler(STI);
}
extern "C" void LLVMInitializeX86Disassembler() {
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
index 550cf9d..6ac9a0f 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -92,6 +92,7 @@ struct InternalInstruction;
namespace llvm {
class MCInst;
+class MCSubtargetInfo;
class MemoryObject;
class raw_ostream;
@@ -107,16 +108,17 @@ protected:
/// Constructor - Initializes the disassembler.
///
/// @param mode - The X86 architecture mode to decode for.
- X86GenericDisassembler(DisassemblerMode mode);
+ X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode);
public:
~X86GenericDisassembler();
/// getInstruction - See MCDisassembler.
- bool getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream) const;
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
EDInstInfo *getEDInfo() const;
@@ -127,24 +129,24 @@ private:
/// X86_16Disassembler - 16-bit X86 disassembler.
class X86_16Disassembler : public X86GenericDisassembler {
public:
- X86_16Disassembler() :
- X86GenericDisassembler(MODE_16BIT) {
+ X86_16Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_16BIT) {
}
};
/// X86_16Disassembler - 32-bit X86 disassembler.
class X86_32Disassembler : public X86GenericDisassembler {
public:
- X86_32Disassembler() :
- X86GenericDisassembler(MODE_32BIT) {
+ X86_32Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_32BIT) {
}
};
/// X86_16Disassembler - 64-bit X86 disassembler.
class X86_64Disassembler : public X86GenericDisassembler {
public:
- X86_64Disassembler() :
- X86GenericDisassembler(MODE_64BIT) {
+ X86_64Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_64BIT) {
}
};
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index de1610b..f9b0fe5 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -58,8 +58,8 @@ static InstructionContext contextForAttrs(uint8_t attrMask) {
* @return - TRUE if the ModR/M byte is required, FALSE otherwise.
*/
static int modRMRequired(OpcodeType type,
- InstructionContext insnContext,
- uint8_t opcode) {
+ InstructionContext insnContext,
+ uint8_t opcode) {
const struct ContextDecision* decision = 0;
switch (type) {
@@ -391,7 +391,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
return -1;
}
- if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexSize = 3;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
@@ -406,12 +406,14 @@ static int readPrefixes(struct InternalInstruction* insn) {
consumeByte(insn, &insn->vexPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
-
- insn->rexPrefix = 0x40
- | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
- | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
- | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
- | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+ }
switch (ppFromVEX3of3(insn->vexPrefix[2]))
{
@@ -433,7 +435,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
return -1;
}
- if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexSize = 2;
}
else {
@@ -444,8 +446,10 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vexPrefix[0] = byte;
consumeByte(insn, &insn->vexPrefix[1]);
- insn->rexPrefix = 0x40
- | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+ }
switch (ppFromVEX2of2(insn->vexPrefix[1]))
{
@@ -700,34 +704,6 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
}
/*
- * is64BitEquivalent - Determines whether two instruction names refer to
- * equivalent instructions but one is 64-bit whereas the other is not.
- *
- * @param orig - The instruction that is not 64-bit
- * @param equiv - The instruction that is 64-bit
- */
-static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
- off_t i;
-
- for (i = 0;; i++) {
- if (orig[i] == '\0' && equiv[i] == '\0')
- return TRUE;
- if (orig[i] == '\0' || equiv[i] == '\0')
- return FALSE;
- if (orig[i] != equiv[i]) {
- if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
- continue;
- if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
- continue;
- if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
- continue;
- return FALSE;
- }
- }
-}
-
-
-/*
* getID - Determines the ID of an instruction, consuming the ModR/M byte as
* appropriate for extended and escape opcodes. Determines the attributes and
* context for the instruction before doing so.
@@ -763,8 +739,6 @@ static int getID(struct InternalInstruction* insn) {
break;
}
- if (wFromVEX3of3(insn->vexPrefix[2]))
- attrMask |= ATTR_REXW;
if (lFromVEX3of3(insn->vexPrefix[2]))
attrMask |= ATTR_VEXL;
}
@@ -789,63 +763,55 @@ static int getID(struct InternalInstruction* insn) {
}
}
else {
- if (insn->rexPrefix & 0x08)
- attrMask |= ATTR_REXW;
-
if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
attrMask |= ATTR_XS;
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
attrMask |= ATTR_XD;
-
}
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
/* The following clauses compensate for limitations of the tables. */
- if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
+ if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) {
/*
- * Although for SSE instructions it is usually necessary to treat REX.W+F2
- * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
- * an occasional instruction where F2 is incidental and REX.W is the more
- * significant. If the decoded instruction is 32-bit and adding REX.W
- * instead of F2 changes a 32 to a 64, we adopt the new encoding.
+ * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
+ * has precedence since there are no L-bit with W-bit entries in the tables.
+ * So if the L-bit isn't significant we should use the W-bit instead.
*/
-
+
const struct InstructionSpecifier *spec;
- uint16_t instructionIDWithREXw;
- const struct InstructionSpecifier *specWithREXw;
-
+ uint16_t instructionIDWithWBit;
+ const struct InstructionSpecifier *specWithWBit;
+
spec = specifierForUID(instructionID);
-
- if (getIDWithAttrMask(&instructionIDWithREXw,
+
+ if (getIDWithAttrMask(&instructionIDWithWBit,
insn,
- attrMask & (~ATTR_XD))) {
- /*
- * Decoding with REX.w would yield nothing; give up and return original
- * decode.
- */
-
+ (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
insn->instructionID = instructionID;
insn->spec = spec;
return 0;
}
-
- specWithREXw = specifierForUID(instructionIDWithREXw);
-
- if (is64BitEquivalent(spec->name, specWithREXw->name)) {
- insn->instructionID = instructionIDWithREXw;
- insn->spec = specWithREXw;
+
+ specWithWBit = specifierForUID(instructionIDWithWBit);
+
+ if (instructionID != instructionIDWithWBit) {
+ insn->instructionID = instructionIDWithWBit;
+ insn->spec = specWithWBit;
} else {
insn->instructionID = instructionID;
insn->spec = spec;
}
return 0;
}
-
+
if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
/*
* The instruction tables make no distinction between instructions that
@@ -885,6 +851,43 @@ static int getID(struct InternalInstruction* insn) {
}
return 0;
}
+
+ if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
+ insn->rexPrefix & 0x01) {
+ /*
+ * NOOP shouldn't decode as NOOP if REX.b is set. Instead
+ * it should decode as XCHG %r8, %eax.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithNewOpcode;
+ const struct InstructionSpecifier *specWithNewOpcode;
+
+ spec = specifierForUID(instructionID);
+
+ /* Borrow opcode from one of the other XCHGar opcodes */
+ insn->opcode = 0x91;
+
+ if (getIDWithAttrMask(&instructionIDWithNewOpcode,
+ insn,
+ attrMask)) {
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
+
+ /* Change back */
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionIDWithNewOpcode;
+ insn->spec = specWithNewOpcode;
+
+ return 0;
+ }
insn->instructionID = instructionID;
insn->spec = specifierForUID(insn->instructionID);
@@ -1434,11 +1437,10 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
}
/*
- * readVVVV - Consumes an immediate operand from an instruction, given the
- * desired operand size.
+ * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
*
* @param insn - The instruction whose operand is to be read.
- * @return - 0 if the immediate was successfully consumed; nonzero
+ * @return - 0 if the vvvv was successfully consumed; nonzero
* otherwise.
*/
static int readVVVV(struct InternalInstruction* insn) {
@@ -1451,6 +1453,9 @@ static int readVVVV(struct InternalInstruction* insn) {
else
return -1;
+ if (insn->mode != MODE_64BIT)
+ insn->vvvv &= 0x7;
+
return 0;
}
@@ -1463,8 +1468,14 @@ static int readVVVV(struct InternalInstruction* insn) {
*/
static int readOperands(struct InternalInstruction* insn) {
int index;
+ int hasVVVV, needVVVV;
dbgprintf(insn, "readOperands()");
+
+ /* If non-zero vvvv specified, need to make sure one of the operands
+ uses it. */
+ hasVVVV = !readVVVV(insn);
+ needVVVV = hasVVVV && (insn->vvvv != 0);
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
switch (insn->spec->operands[index].encoding) {
@@ -1537,7 +1548,8 @@ static int readOperands(struct InternalInstruction* insn) {
return -1;
break;
case ENCODING_VVVV:
- if (readVVVV(insn))
+ needVVVV = 0; /* Mark that we have found a VVVV operand. */
+ if (!hasVVVV)
return -1;
if (fixupReg(insn, &insn->spec->operands[index]))
return -1;
@@ -1549,6 +1561,9 @@ static int readOperands(struct InternalInstruction* insn) {
return -1;
}
}
+
+ /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
+ if (needVVVV) return -1;
return 0;
}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 70315ed..8b79335 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -81,12 +81,18 @@ enum attributeBits {
"but not the operands") \
ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
"but not the operands") \
+ ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
"change width; overrides IC_OPSIZE") \
ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
"secondary") \
ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
+ ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \
ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
"opcode") \
ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
@@ -104,7 +110,7 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
- ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt
deleted file mode 100644
index 033973e..0000000
--- a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMX86AsmPrinter
- X86ATTInstPrinter.cpp
- X86IntelInstPrinter.cpp
- X86InstComments.cpp
- )
-add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile
deleted file mode 100644
index c82aa33..0000000
--- a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMX86AsmPrinter
-
-# Hack: we need to include 'main' x86 target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index c37d879..029d491 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -39,14 +39,17 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS,
OS << '%' << getRegisterName(RegNo);
}
-void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
// Try to print any aliases first.
if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
// If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream)
+ if (CommentStream) {
+ printAnnotation(OS, Annot);
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+ }
}
StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
@@ -90,7 +93,8 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
O << '%' << getRegisterName(Op.getReg());
} else if (Op.isImm()) {
- O << '$' << Op.getImm();
+ // Print X86 immediates as signed values.
+ O << '$' << (int64_t)Op.getImm();
if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
*CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 5426e5c..0293869 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -25,7 +25,7 @@ public:
X86ATTInstPrinter(const MCAsmInfo &MAI);
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &OS);
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen, returns true if we successfully printed an
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 4e28dfe..8d85b95 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -14,9 +14,9 @@
#include "X86InstComments.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "Utils/X86ShuffleDecode.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
-#include "../Utils/X86ShuffleDecode.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -136,9 +136,11 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::SHUFPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPDrmi:
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
- Src2Name = getRegName(MI->getOperand(2).getReg());
break;
case X86::SHUFPSrri:
@@ -205,6 +207,31 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKHPMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VPERMILPSri:
+ DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPSYri:
+ DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDri:
+ DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDYri:
+ DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERM2F128rr:
+ DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 506e26c..f9ab5ae 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -32,12 +32,15 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
-void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
printInstruction(MI, OS);
// If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream)
+ if (CommentStream) {
+ printAnnotation(OS, Annot);
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+ }
}
StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index e84a194..6d5ec62 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -27,7 +27,7 @@ public:
: MCInstPrinter(MAI) {}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &OS);
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen.
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index ca88f8f..0000000
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_llvm_library(LLVMX86Desc
- X86MCTargetDesc.cpp
- X86MCAsmInfo.cpp
- )
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile
deleted file mode 100644
index b19774e..0000000
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMX86Desc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 9b556a5..69ad7d7 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmBackend.h"
-#include "X86.h"
-#include "X86FixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -24,9 +24,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
// Option to allow disabling arithmetic relaxation to workaround PR9807, which
@@ -63,10 +62,10 @@ public:
: MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
};
-class X86AsmBackend : public TargetAsmBackend {
+class X86AsmBackend : public MCAsmBackend {
public:
X86AsmBackend(const Target &T)
- : TargetAsmBackend() {}
+ : MCAsmBackend() {}
unsigned getNumFixupKinds() const {
return X86::NumTargetFixupKinds;
@@ -81,7 +80,7 @@ public:
};
if (Kind < FirstTargetFixupKind)
- return TargetAsmBackend::getFixupKindInfo(Kind);
+ return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
@@ -94,6 +93,14 @@ public:
assert(Fixup.getOffset() + Size <= DataSize &&
"Invalid fixup offset!");
+
+ // Check that uppper bits are either all zeros or all ones.
+ // Specifically ignore overflow/underflow as long as the leakage is
+ // limited to the lower bits. This is to remain compatible with
+ // other assemblers.
+ assert(isIntN(Size * 8 + 1, Value) &&
+ "Value does not fit in the Fixup field");
+
for (unsigned i = 0; i != Size; ++i)
Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
@@ -426,8 +433,7 @@ public:
} // end anonymous namespace
-TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
@@ -439,8 +445,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
return new ELFX86_32AsmBackend(T, TheTriple.getOS());
}
-TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
new file mode 100644
index 0000000..e6ba705
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -0,0 +1,548 @@
+//===-- X86BaseInfo.h - Top level definitions for X86 -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the X86 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86BASEINFO_H
+#define X86BASEINFO_H
+
+#include "X86MCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+
+namespace X86 {
+ // Enums for memory operand decoding. Each memory operand is represented with
+ // a 5 operand sequence in the form:
+ // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+ // These enums help decode this.
+ enum {
+ AddrBaseReg = 0,
+ AddrScaleAmt = 1,
+ AddrIndexReg = 2,
+ AddrDisp = 3,
+
+ /// AddrSegmentReg - The operand # of the segment in the memory operand.
+ AddrSegmentReg = 4,
+
+ /// AddrNumOperands - Total number of operands in a memory reference.
+ AddrNumOperands = 5
+ };
+} // end namespace X86;
+
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // X86 Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+ /// relocation of:
+ /// SYMBOL_LABEL + [. - PICBASELABEL]
+ MO_GOT_ABSOLUTE_ADDRESS,
+
+ /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+ /// immediate should get the value of the symbol minus the PIC base label:
+ /// SYMBOL_LABEL - PICBASELABEL
+ MO_PIC_BASE_OFFSET,
+
+ /// MO_GOT - On a symbol operand this indicates that the immediate is the
+ /// offset to the GOT entry for the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOT
+ MO_GOT,
+
+ /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+ /// the offset to the location of the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTOFF
+ MO_GOTOFF,
+
+ /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+ /// offset to the GOT entry for the symbol name from the current code
+ /// location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTPCREL
+ MO_GOTPCREL,
+
+ /// MO_PLT - On a symbol operand this indicates that the immediate is
+ /// offset to the PLT entry of symbol name from the current code location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @PLT
+ MO_PLT,
+
+ /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSGD
+ MO_TLSGD,
+
+ /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTTPOFF
+ MO_GOTTPOFF,
+
+ /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @INDNTPOFF
+ MO_INDNTPOFF,
+
+ /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TPOFF
+ MO_TPOFF,
+
+ /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @NTPOFF
+ MO_NTPOFF,
+
+ /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "__imp_FOO" symbol. This is used for
+ /// dllimport linkage on windows.
+ MO_DLLIMPORT,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB,
+
+ /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+ /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY,
+
+ /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+ /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+ /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY_PIC_BASE,
+
+ /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+ /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+ /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+ /// stub.
+ MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+ /// MO_TLVP - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// This is the TLS offset for the Darwin TLS mechanism.
+ MO_TLVP,
+
+ /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+ /// is some TLS offset from the picbase.
+ ///
+ /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+ MO_TLVP_PIC_BASE
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for X86
+ // instructions.
+ //
+
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ ///
+ MRMDestReg = 3,
+
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ ///
+ MRMDestMem = 4,
+
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ ///
+ MRMSrcReg = 5,
+
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ ///
+ MRMSrcMem = 6,
+
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
+ /// a Mod/RM byte, and use the middle field to hold extended opcode
+ /// information. In the intel manual these are represented as /0, /1, ...
+ ///
+
+ // First, instructions that operate on a register r/m operand...
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
+
+ // Next, instructions that operate on a memory r/m operand...
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
+
+ // MRMInitReg - This form is used for instructions whose source and
+ // destinations are the same register.
+ MRMInitReg = 32,
+
+ //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+ MRM_C1 = 33,
+ MRM_C2 = 34,
+ MRM_C3 = 35,
+ MRM_C4 = 36,
+ MRM_C8 = 37,
+ MRM_C9 = 38,
+ MRM_E8 = 39,
+ MRM_F0 = 40,
+ MRM_F8 = 41,
+ MRM_F9 = 42,
+ MRM_D0 = 45,
+ MRM_D1 = 46,
+
+ /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+ /// immediates, the first of which is a 16-bit immediate (specified by
+ /// the imm encoding) and the second is a 8-bit fixed value.
+ RawFrmImm8 = 43,
+
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 44,
+
+ FormMask = 63,
+
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+
+ // OpSize - Set if this instruction requires an operand size prefix (0x66),
+ // which most often indicates that the instruction operates on 16 bit data
+ // instead of 32 bit data.
+ OpSize = 1 << 6,
+
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
+ // Op0Mask - There are several prefix bytes that are used to form two byte
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
+ //
+ Op0Shift = 8,
+ Op0Mask = 0x1F << Op0Shift,
+
+ // TB - TwoByte - Set if this instruction has a two byte opcode, which
+ // starts with a 0x0F byte before the real opcode.
+ TB = 1 << Op0Shift,
+
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
+ // D8-DF - These escape opcodes are used by the floating point unit. These
+ // values must remain sequential.
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
+
+ // XS, XD - These prefix codes are for single and double precision scalar
+ // floating point operations performed in the SSE registers.
+ XD = 11 << Op0Shift, XS = 12 << Op0Shift,
+
+ // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
+ T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+ A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
+
+ // TF - Prefix before and after 0x0F
+ TF = 17 << Op0Shift,
+
+ //===------------------------------------------------------------------===//
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = Op0Shift + 5,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = REXShift + 1,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm8PCRel = 2 << ImmShift,
+ Imm16 = 3 << ImmShift,
+ Imm16PCRel = 4 << ImmShift,
+ Imm32 = 5 << ImmShift,
+ Imm32PCRel = 6 << ImmShift,
+ Imm64 = 7 << ImmShift,
+
+ //===------------------------------------------------------------------===//
+ // FP Instruction Classification... Zero is non-fp instruction.
+
+ // FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = ImmShift + 3,
+ FPTypeMask = 7 << FPTypeShift,
+
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
+ // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+
+ // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+
+ // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ // result back to ST(0). For example, fcos, fsqrt, etc.
+ //
+ OneArgFPRW = 3 << FPTypeShift,
+
+ // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ // explicit argument, storing the result to either ST(0) or the implicit
+ // argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+
+ // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ // explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+
+ // CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+
+ // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+
+ // Lock prefix
+ LOCKShift = FPTypeShift + 3,
+ LOCK = 1 << LOCKShift,
+
+ // Segment override prefixes. Currently we just need ability to address
+ // stuff in gs and fs segments.
+ SegOvrShift = LOCKShift + 1,
+ SegOvrMask = 3 << SegOvrShift,
+ FS = 1 << SegOvrShift,
+ GS = 2 << SegOvrShift,
+
+ // Execution domain for SSE instructions in bits 23, 24.
+ // 0 in bits 23-24 means normal, non-SSE instruction.
+ SSEDomainShift = SegOvrShift + 2,
+
+ OpcodeShift = SSEDomainShift + 2,
+
+ //===------------------------------------------------------------------===//
+ /// VEX - The opcode prefix used by AVX instructions
+ VEXShift = OpcodeShift + 8,
+ VEX = 1U << 0,
+
+ /// VEX_W - Has a opcode specific functionality, but is used in the same
+ /// way as REX_W is for regular SSE instructions.
+ VEX_W = 1U << 1,
+
+ /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ /// address instructions in SSE are represented as 3 address ones in AVX
+ /// and the additional register is encoded in VEX_VVVV prefix.
+ VEX_4V = 1U << 2,
+
+ /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ /// must be encoded in the i8 immediate field. This usually happens in
+ /// instructions with 4 operands.
+ VEX_I8IMM = 1U << 3,
+
+ /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ /// instruction uses 256-bit wide registers. This is usually auto detected
+ /// if a VR256 register is used, but some AVX instructions also have this
+ /// field marked when using a f256 memory references.
+ VEX_L = 1U << 4,
+
+ // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
+ // prefix. Usually used for scalar instructions. Needed by disassembler.
+ VEX_LIG = 1U << 5,
+
+ /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+ /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
+ /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+ /// storing a classifier in the imm8 field. To simplify our implementation,
+ /// we handle this by storeing the classifier in the opcode field and using
+ /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+ Has3DNow0F0FOpcode = 1U << 6
+ };
+
+ // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+ // specified machine instruction.
+ //
+ static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ return TSFlags >> X86II::OpcodeShift;
+ }
+
+ static inline bool hasImm(uint64_t TSFlags) {
+ return (TSFlags & X86II::ImmMask) != 0;
+ }
+
+ /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
+ /// of the specified instruction.
+ static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8:
+ case X86II::Imm8PCRel: return 1;
+ case X86II::Imm16:
+ case X86II::Imm16PCRel: return 2;
+ case X86II::Imm32:
+ case X86II::Imm32PCRel: return 4;
+ case X86II::Imm64: return 8;
+ }
+ }
+
+ /// isImmPCRel - Return true if the immediate of the specified instruction's
+ /// TSFlags indicates that it is pc relative.
+ static inline unsigned isImmPCRel(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm64:
+ return false;
+ }
+ }
+
+ /// getMemoryOperandNo - The function returns the MCInst operand # for the
+ /// first field of the memory operand. If the instruction doesn't have a
+ /// memory operand, this returns -1.
+ ///
+ /// Note that this ignores tied operands. If there is a tied register which
+ /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+ /// counted as one operand.
+ ///
+ static inline int getMemoryOperandNo(uint64_t TSFlags) {
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
+ default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::Pseudo:
+ case X86II::RawFrm:
+ case X86II::AddRegFrm:
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ case X86II::RawFrmImm8:
+ case X86II::RawFrmImm16:
+ return -1;
+ case X86II::MRMDestMem:
+ return 0;
+ case X86II::MRMSrcMem: {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ unsigned FirstMemOp = 1;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+
+ // FIXME: Maybe lea should have its own form? This is a horrible hack.
+ //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ return FirstMemOp;
+ }
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ return -1;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ return 0;
+ case X86II::MRM_C1:
+ case X86II::MRM_C2:
+ case X86II::MRM_C3:
+ case X86II::MRM_C4:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_E8:
+ case X86II::MRM_F0:
+ case X86II::MRM_F8:
+ case X86II::MRM_F9:
+ case X86II::MRM_D0:
+ case X86II::MRM_D1:
+ return -1;
+ }
+ }
+
+ /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
+ /// higher) register? e.g. r8, xmm8, xmm13, etc.
+ static inline bool isX86_64ExtendedReg(unsigned RegNo) {
+ switch (RegNo) {
+ default: break;
+ case X86::R8: case X86::R9: case X86::R10: case X86::R11:
+ case X86::R12: case X86::R13: case X86::R14: case X86::R15:
+ case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
+ case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
+ case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
+ case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
+ case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
+ case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
+ return true;
+ }
+ return false;
+ }
+
+ static inline bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+ }
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 17d242a..17d242a 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupKinds.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index ce8ef49..2eee112 100644
--- a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -12,12 +12,14 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,7 +47,7 @@ public:
}
static unsigned GetX86RegNum(const MCOperand &MO) {
- return X86RegisterInfo::getX86RegNum(MO.getReg());
+ return X86_MC::getX86RegNum(MO.getReg());
}
// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
@@ -159,9 +161,11 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
return true;
return false;
}
@@ -191,11 +195,11 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
const MCExpr *Expr = NULL;
if (DispOp.isImm()) {
- // If this is a simple integer displacement that doesn't require a relocation,
- // emit it now.
+ // If this is a simple integer displacement that doesn't require a
+ // relocation, emit it now.
if (FixupKind != FK_PCRel_1 &&
- FixupKind != FK_PCRel_2 &&
- FixupKind != FK_PCRel_4) {
+ FixupKind != FK_PCRel_2 &&
+ FixupKind != FK_PCRel_4) {
EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
return;
}
@@ -205,7 +209,9 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
}
// If we have an immoffset, add it to the expression.
- if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+ if ((FixupKind == FK_Data_4 ||
+ FixupKind == MCFixupKind(X86::reloc_signed_4byte)) &&
+ StartsWithGlobalOffsetTable(Expr)) {
assert(ImmOffset == 0);
FixupKind = MCFixupKind(X86::reloc_global_offset_table);
@@ -346,7 +352,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
}
// Calculate what the SS field value should be...
- static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
unsigned SS = SSTable[Scale.getImm()];
if (BaseReg == 0) {
@@ -486,71 +492,100 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_L = 1;
}
- unsigned NumOps = MI.getNumOperands();
+ // Classify VEX_B, VEX_4V, VEX_R, VEX_X
unsigned CurOp = 0;
- bool IsDestMem = false;
-
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
- case X86II::MRMDestMem:
- IsDestMem = true;
- // The important info for the VEX prefix is never beyond the address
- // registers. Don't check beyond that.
- NumOps = CurOp = X86::AddrNumOperands;
+ case X86II::MRMDestMem: {
+ // MRMDestMem instructions forms:
+ // MemAddr, src1(ModR/M)
+ // MemAddr, src1(VEX_4V), src2(ModR/M)
+ // MemAddr, src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ CurOp = X86::AddrNumOperands;
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ // MRMSrcMem instructions forms:
+ // src1(ModR/M), MemAddr
+ // src1(ModR/M), src2(VEX_4V), MemAddr
+ // src1(ModR/M), MemAddr, imm8
+ // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_R = 0x0;
+
+ unsigned MemAddrOffset = 1;
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, 1);
+ MemAddrOffset++;
+ }
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemAddrOffset+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemAddrOffset+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
+ }
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMSrcMem:
+ // MRM[0-9]m instructions forms:
+ // MemAddr
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
case X86II::MRMSrcReg:
- if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ // MRMSrcReg instructions forms:
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M), src1(ModR/M)
+ // dst(ModR/M), src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
CurOp++;
- if (HasVEX_4V) {
- VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
- CurOp++;
- }
-
- // To only check operands before the memory address ones, start
- // the search from the beginning
- if (IsDestMem)
- CurOp = 0;
-
- // If the last register should be encoded in the immediate field
- // do not use any bit from VEX prefix to this register, ignore it
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM)
- NumOps--;
-
- for (; CurOp != NumOps; ++CurOp) {
- const MCOperand &MO = MI.getOperand(CurOp);
- if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- VEX_B = 0x0;
- if (!VEX_B && MO.isReg() &&
- ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
- X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- VEX_X = 0x0;
- }
- break;
- default: // MRMDestReg, MRM0r-MRM7r, RawFrm
- if (!MI.getNumOperands())
- break;
-
- if (MI.getOperand(CurOp).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_B = 0;
-
if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
-
- CurOp++;
- for (; CurOp != NumOps; ++CurOp) {
- const MCOperand &MO = MI.getOperand(CurOp);
- if (MO.isReg() && !HasVEX_4V &&
- X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- VEX_R = 0x0;
- }
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ break;
+ case X86II::MRMDestReg:
+ // MRMDestReg instructions forms:
+ // dst(ModR/M), src(ModR/M)
+ // dst(ModR/M), src(ModR/M), imm8
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_R = 0x0;
+ break;
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ // MRM0r-MRM7r instructions forms:
+ // dst(VEX_4V), src(ModR/M), imm8
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_B = 0x0;
+ break;
+ default: // RawFrm
break;
}
@@ -604,7 +639,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
const MCOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue;
+ if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue;
// FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
// that returns non-zero.
REX |= 0x40; // REX fixed encoding prefix
@@ -615,25 +650,25 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
case X86II::MRMSrcReg:
if (MI.getOperand(0).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
REX |= 1 << 2; // set REX.R
i = isTwoAddr ? 2 : 1;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
- if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
REX |= 1 << 0; // set REX.B
}
break;
case X86II::MRMSrcMem: {
if (MI.getOperand(0).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
REX |= 1 << 2; // set REX.R
unsigned Bit = 0;
i = isTwoAddr ? 2 : 1;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
- if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
Bit++;
}
@@ -648,13 +683,13 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
i = isTwoAddr ? 1 : 0;
if (NumOps > e && MI.getOperand(e).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
+ X86II::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
REX |= 1 << 2; // set REX.R
unsigned Bit = 0;
for (; i != e; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
- if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
Bit++;
}
@@ -663,12 +698,12 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
}
default:
if (MI.getOperand(0).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
REX |= 1 << 0; // set REX.B
i = isTwoAddr ? 2 : 1;
for (unsigned e = NumOps; i != e; ++i) {
const MCOperand &MO = MI.getOperand(i);
- if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
REX |= 1 << 2; // set REX.R
}
break;
@@ -731,7 +766,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags & X86II::AdSize) ||
(MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
EmitByte(0x67, CurByte, OS);
-
+
// Emit the operand size opcode prefix as needed.
if (TSFlags & X86II::OpSize)
EmitByte(0x66, CurByte, OS);
@@ -834,7 +869,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
HasVEX_4V = true;
-
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
if (MemoryOperand != -1) MemoryOperand += CurOp;
@@ -844,12 +878,11 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
else
EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
-
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
-
+
if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
BaseOpcode = 0x0F; // Weird 3DNow! encoding.
-
+
unsigned SrcRegNum = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
@@ -861,7 +894,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
-
case X86II::RawFrmImm8:
EmitByte(BaseOpcode, CurByte, OS);
EmitImmediate(MI.getOperand(CurOp++),
@@ -1006,8 +1038,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
- bool IsExtReg =
- X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
+ bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
RegNum |= GetX86RegNum(MO) << 4;
EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
@@ -1030,7 +1061,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
-
#ifndef NDEBUG
// FIXME: Verify.
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index b77f37b..f98d5e3 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -13,12 +13,18 @@
#include "X86MCTargetDesc.h"
#include "X86MCAsmInfo.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_REGINFO_MC_DESC
#include "X86GenRegisterInfo.inc"
@@ -34,9 +40,16 @@ using namespace llvm;
std::string X86_MC::ParseX86Triple(StringRef TT) {
Triple TheTriple(TT);
+ std::string FS;
if (TheTriple.getArch() == Triple::x86_64)
- return "+64bit-mode";
- return "-64bit-mode";
+ FS = "+64bit-mode";
+ else
+ FS = "-64bit-mode";
+ if (TheTriple.getOS() == Triple::NativeClient)
+ FS += ",+nacl-mode";
+ else
+ FS += ",-nacl-mode";
+ return FS;
}
/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
@@ -107,6 +120,135 @@ void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
}
}
+unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::x86_64)
+ return DWARFFlavour::X86_64;
+
+ if (TheTriple.isOSDarwin())
+ return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic;
+ if (TheTriple.getOS() == Triple::MinGW32 ||
+ TheTriple.getOS() == Triple::Cygwin)
+ // Unsupported by now, just quick fallback
+ return DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
+}
+
+/// getX86RegNum - This function maps LLVM register identifiers to their X86
+/// specific numbering, which is used in various places encoding instructions.
+unsigned X86_MC::getX86RegNum(unsigned RegNo) {
+ switch(RegNo) {
+ case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+ case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+ case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+ case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+ case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+ return N86::ESP;
+ case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+ return N86::EBP;
+ case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+ return N86::ESI;
+ case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+ return N86::EDI;
+
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ return N86::EAX;
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ return N86::ECX;
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ return N86::EDX;
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ return N86::EBX;
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ return N86::ESP;
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ return N86::EBP;
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ return N86::ESI;
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ return N86::EDI;
+
+ case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+ case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+ return RegNo-X86::ST0;
+
+ case X86::XMM0: case X86::XMM8:
+ case X86::YMM0: case X86::YMM8: case X86::MM0:
+ return 0;
+ case X86::XMM1: case X86::XMM9:
+ case X86::YMM1: case X86::YMM9: case X86::MM1:
+ return 1;
+ case X86::XMM2: case X86::XMM10:
+ case X86::YMM2: case X86::YMM10: case X86::MM2:
+ return 2;
+ case X86::XMM3: case X86::XMM11:
+ case X86::YMM3: case X86::YMM11: case X86::MM3:
+ return 3;
+ case X86::XMM4: case X86::XMM12:
+ case X86::YMM4: case X86::YMM12: case X86::MM4:
+ return 4;
+ case X86::XMM5: case X86::XMM13:
+ case X86::YMM5: case X86::YMM13: case X86::MM5:
+ return 5;
+ case X86::XMM6: case X86::XMM14:
+ case X86::YMM6: case X86::YMM14: case X86::MM6:
+ return 6;
+ case X86::XMM7: case X86::XMM15:
+ case X86::YMM7: case X86::YMM15: case X86::MM7:
+ return 7;
+
+ case X86::ES: return 0;
+ case X86::CS: return 1;
+ case X86::SS: return 2;
+ case X86::DS: return 3;
+ case X86::FS: return 4;
+ case X86::GS: return 5;
+
+ case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+ case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+ case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+ case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+ case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+ case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+ case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+ case X86::CR7: case X86::CR15: case X86::DR7: return 7;
+
+ // Pseudo index registers are equivalent to a "none"
+ // scaled index (See Intel Manual 2A, table 2-3)
+ case X86::EIZ:
+ case X86::RIZ:
+ return 4;
+
+ default:
+ assert((int(RegNo) > 0) && "Unknown physical register!");
+ return 0;
+ }
+}
+
+void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) {
+ // FIXME: TableGen these.
+ for (unsigned Reg = X86::NoRegister+1; Reg < X86::NUM_TARGET_REGS; ++Reg) {
+ int SEH = X86_MC::getX86RegNum(Reg);
+ switch (Reg) {
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ SEH += 8;
+ break;
+ }
+ MRI->mapLLVMRegToSEHReg(Reg, SEH);
+ }
+}
+
MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
std::string ArchFS = X86_MC::ParseX86Triple(TT);
@@ -131,55 +273,191 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-// Force static initialization.
-extern "C" void LLVMInitializeX86MCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
- X86_MC::createX86MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
- X86_MC::createX86MCSubtargetInfo);
-}
-
static MCInstrInfo *createX86MCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitX86MCInstrInfo(X);
return X;
}
-extern "C" void LLVMInitializeX86MCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
-}
+static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ unsigned RA = (TheTriple.getArch() == Triple::x86_64)
+ ? X86::RIP // Should have dwarf #16.
+ : X86::EIP; // Should have dwarf #8.
-static MCRegisterInfo *createX86MCRegisterInfo() {
MCRegisterInfo *X = new MCRegisterInfo();
- InitX86MCRegisterInfo(X);
+ InitX86MCRegisterInfo(X, RA,
+ X86_MC::getDwarfRegFlavour(TT, false),
+ X86_MC::getDwarfRegFlavour(TT, true));
+ X86_MC::InitLLVM2SEHRegisterMapping(X);
return X;
}
-extern "C" void LLVMInitializeX86MCRegInfo() {
- TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
-}
-
-
static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
+ bool is64Bit = TheTriple.getArch() == Triple::x86_64;
+ MCAsmInfo *MAI;
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
- if (TheTriple.getArch() == Triple::x86_64)
- return new X86_64MCAsmInfoDarwin(TheTriple);
+ if (is64Bit)
+ MAI = new X86_64MCAsmInfoDarwin(TheTriple);
else
- return new X86MCAsmInfoDarwin(TheTriple);
+ MAI = new X86MCAsmInfoDarwin(TheTriple);
+ } else if (TheTriple.isOSWindows()) {
+ MAI = new X86MCAsmInfoCOFF(TheTriple);
+ } else {
+ MAI = new X86ELFMCAsmInfo(TheTriple);
}
+ // Initialize initial frame state.
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = is64Bit ? -8 : -4;
+
+ // Initial state of the frame pointer is esp+stackGrowth.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ // Add return address to move list
+ MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
+ MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP);
+ MAI->addInitialFrameState(0, CSDst, CSSrc);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ Triple T(TT);
+ bool is64Bit = T.getArch() == Triple::x86_64;
+
+ if (RM == Reloc::Default) {
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (T.isOSDarwin()) {
+ if (is64Bit)
+ RM = Reloc::PIC_;
+ else
+ RM = Reloc::DynamicNoPIC;
+ } else if (T.isOSWindows() && is64Bit)
+ RM = Reloc::PIC_;
+ else
+ RM = Reloc::Static;
+ }
+
+ // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
+ // is defined as a model for code which may be used in static or dynamic
+ // executables but not necessarily a shared library. On X86-32 we just
+ // compile in -static mode, in x86-64 we use PIC.
+ if (RM == Reloc::DynamicNoPIC) {
+ if (is64Bit)
+ RM = Reloc::PIC_;
+ else if (!T.isOSDarwin())
+ RM = Reloc::Static;
+ }
+
+ // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+ // the Mach-O file format doesn't support it.
+ if (RM == Reloc::Static && T.isOSDarwin() && is64Bit)
+ RM = Reloc::PIC_;
+
+ // For static codegen, if we're not already set, use Small codegen.
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ else if (CM == CodeModel::JITDefault)
+ // 64-bit JIT places everything in the same buffer except external funcs.
+ CM = is64Bit ? CodeModel::Large : CodeModel::Small;
+
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
+
if (TheTriple.isOSWindows())
- return new X86MCAsmInfoCOFF(TheTriple);
+ return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new X86ATTInstPrinter(MAI);
+ if (SyntaxVariant == 1)
+ return new X86IntelInstPrinter(MAI);
+ return 0;
+}
- return new X86ELFMCAsmInfo(TheTriple);
+static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
+ return new MCInstrAnalysis(Info);
}
-extern "C" void LLVMInitializeX86MCAsmInfo() {
- // Register the target asm info.
+// Force static initialization.
+extern "C" void LLVMInitializeX86TargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+
+ // Register the MC codegen info.
+ RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo);
+ RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+ X86_MC::createX86MCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+ X86_MC::createX86MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target,
+ createX86MCInstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target,
+ createX86MCInstrAnalysis);
+
+ // Register the code emitter.
+ TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target,
+ createX86MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target,
+ createX86MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheX86_32Target,
+ createX86_32AsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
+ createX86_64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target,
+ createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,
+ createX86MCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,
+ createX86MCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 89ea22b..c144c51 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -14,15 +14,39 @@
#ifndef X86MCTARGETDESC_H
#define X86MCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
#include <string>
namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
+class raw_ostream;
extern Target TheX86_32Target, TheX86_64Target;
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+ enum {
+ X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+ };
+}
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+ enum {
+ EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+ };
+}
+
namespace X86_MC {
std::string ParseX86Triple(StringRef TT);
@@ -33,13 +57,32 @@ namespace X86_MC {
void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
- /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+ unsigned getDwarfRegFlavour(StringRef TT, bool isEH);
+
+ unsigned getX86RegNum(unsigned RegNo);
+
+ void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
+
+ /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
/// This is exposed so Asm parser, etc. do not need to go through
/// TargetRegistry.
MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS);
}
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT);
+MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT);
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 3711038..f0f1982 100644
--- a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -7,14 +7,14 @@
//
//===----------------------------------------------------------------------===//
-#include "X86.h"
-#include "X86FixupKinds.h"
-#include "llvm/ADT/Twine.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Object/MachOFormat.h"
diff --git a/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp b/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp
deleted file mode 100644
index 13680c5..0000000
--- a/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp
+++ /dev/null
@@ -1,506 +0,0 @@
-//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SSEDomainFix pass.
-//
-// Some SSE instructions like mov, and, or, xor are available in different
-// variants for different operand types. These variant instructions are
-// equivalent, but on Nehalem and newer cpus there is extra latency
-// transferring data between integer and floating point domains.
-//
-// This pass changes the variant instructions to minimize domain crossings.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "sse-domain-fix"
-#include "X86InstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
-/// of execution domains.
-///
-/// An open DomainValue represents a set of instructions that can still switch
-/// execution domain. Multiple registers may refer to the same open
-/// DomainValue - they will eventually be collapsed to the same execution
-/// domain.
-///
-/// A collapsed DomainValue represents a single register that has been forced
-/// into one of more execution domains. There is a separate collapsed
-/// DomainValue for each register, but it may contain multiple execution
-/// domains. A register value is initially created in a single execution
-/// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact the the register is now available in multiple
-/// domains.
-namespace {
-struct DomainValue {
- // Basic reference counting.
- unsigned Refs;
-
- // Bitmask of available domains. For an open DomainValue, it is the still
- // possible domains for collapsing. For a collapsed DomainValue it is the
- // domains where the register is available for free.
- unsigned AvailableDomains;
-
- // Position of the last defining instruction.
- unsigned Dist;
-
- // Twiddleable instructions using or defining these registers.
- SmallVector<MachineInstr*, 8> Instrs;
-
- // A collapsed DomainValue has no instructions to twiddle - it simply keeps
- // track of the domains where the registers are already available.
- bool isCollapsed() const { return Instrs.empty(); }
-
- // Is domain available?
- bool hasDomain(unsigned domain) const {
- return AvailableDomains & (1u << domain);
- }
-
- // Mark domain as available.
- void addDomain(unsigned domain) {
- AvailableDomains |= 1u << domain;
- }
-
- // Restrict to a single domain available.
- void setSingleDomain(unsigned domain) {
- AvailableDomains = 1u << domain;
- }
-
- // Return bitmask of domains that are available and in mask.
- unsigned getCommonDomains(unsigned mask) const {
- return AvailableDomains & mask;
- }
-
- // First domain available.
- unsigned getFirstDomain() const {
- return CountTrailingZeros_32(AvailableDomains);
- }
-
- DomainValue() { clear(); }
-
- void clear() {
- Refs = AvailableDomains = Dist = 0;
- Instrs.clear();
- }
-};
-}
-
-static const unsigned NumRegs = 16;
-
-namespace {
-class SSEDomainFixPass : public MachineFunctionPass {
- static char ID;
- SpecificBumpPtrAllocator<DomainValue> Allocator;
- SmallVector<DomainValue*,16> Avail;
-
- MachineFunction *MF;
- const X86InstrInfo *TII;
- const TargetRegisterInfo *TRI;
- MachineBasicBlock *MBB;
- DomainValue **LiveRegs;
- typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
- LiveOutMap LiveOuts;
- unsigned Distance;
-
-public:
- SSEDomainFixPass() : MachineFunctionPass(ID) {}
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "SSE execution domain fixup";
- }
-
-private:
- // Register mapping.
- int RegIndex(unsigned Reg);
-
- // DomainValue allocation.
- DomainValue *Alloc(int domain = -1);
- void Recycle(DomainValue*);
-
- // LiveRegs manipulations.
- void SetLiveReg(int rx, DomainValue *DV);
- void Kill(int rx);
- void Force(int rx, unsigned domain);
- void Collapse(DomainValue *dv, unsigned domain);
- bool Merge(DomainValue *A, DomainValue *B);
-
- void enterBasicBlock();
- void visitGenericInstr(MachineInstr*);
- void visitSoftInstr(MachineInstr*, unsigned mask);
- void visitHardInstr(MachineInstr*, unsigned domain);
-};
-}
-
-char SSEDomainFixPass::ID = 0;
-
-/// Translate TRI register number to an index into our smaller tables of
-/// interesting registers. Return -1 for boring registers.
-int SSEDomainFixPass::RegIndex(unsigned reg) {
- assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
- reg -= X86::XMM0;
- return reg < NumRegs ? (int) reg : -1;
-}
-
-DomainValue *SSEDomainFixPass::Alloc(int domain) {
- DomainValue *dv = Avail.empty() ?
- new(Allocator.Allocate()) DomainValue :
- Avail.pop_back_val();
- dv->Dist = Distance;
- if (domain >= 0)
- dv->addDomain(domain);
- return dv;
-}
-
-void SSEDomainFixPass::Recycle(DomainValue *dv) {
- assert(dv && "Cannot recycle NULL");
- dv->clear();
- Avail.push_back(dv);
-}
-
-/// Set LiveRegs[rx] = dv, updating reference counts.
-void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
- assert(unsigned(rx) < NumRegs && "Invalid index");
- if (!LiveRegs) {
- LiveRegs = new DomainValue*[NumRegs];
- std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
- }
-
- if (LiveRegs[rx] == dv)
- return;
- if (LiveRegs[rx]) {
- assert(LiveRegs[rx]->Refs && "Bad refcount");
- if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
- }
- LiveRegs[rx] = dv;
- if (dv) ++dv->Refs;
-}
-
-// Kill register rx, recycle or collapse any DomainValue.
-void SSEDomainFixPass::Kill(int rx) {
- assert(unsigned(rx) < NumRegs && "Invalid index");
- if (!LiveRegs || !LiveRegs[rx]) return;
-
- // Before killing the last reference to an open DomainValue, collapse it to
- // the first available domain.
- if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
- Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
- else
- SetLiveReg(rx, 0);
-}
-
-/// Force register rx into domain.
-void SSEDomainFixPass::Force(int rx, unsigned domain) {
- assert(unsigned(rx) < NumRegs && "Invalid index");
- DomainValue *dv;
- if (LiveRegs && (dv = LiveRegs[rx])) {
- if (dv->isCollapsed())
- dv->addDomain(domain);
- else if (dv->hasDomain(domain))
- Collapse(dv, domain);
- else {
- // This is an incompatible open DomainValue. Collapse it to whatever and force
- // the new value into domain. This costs a domain crossing.
- Collapse(dv, dv->getFirstDomain());
- assert(LiveRegs[rx] && "Not live after collapse?");
- LiveRegs[rx]->addDomain(domain);
- }
- } else {
- // Set up basic collapsed DomainValue.
- SetLiveReg(rx, Alloc(domain));
- }
-}
-
-/// Collapse open DomainValue into given domain. If there are multiple
-/// registers using dv, they each get a unique collapsed DomainValue.
-void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
- assert(dv->hasDomain(domain) && "Cannot collapse");
-
- // Collapse all the instructions.
- while (!dv->Instrs.empty())
- TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain);
- dv->setSingleDomain(domain);
-
- // If there are multiple users, give them new, unique DomainValues.
- if (LiveRegs && dv->Refs > 1)
- for (unsigned rx = 0; rx != NumRegs; ++rx)
- if (LiveRegs[rx] == dv)
- SetLiveReg(rx, Alloc(domain));
-}
-
-/// Merge - All instructions and registers in B are moved to A, and B is
-/// released.
-bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
- assert(!A->isCollapsed() && "Cannot merge into collapsed");
- assert(!B->isCollapsed() && "Cannot merge from collapsed");
- if (A == B)
- return true;
- // Restrict to the domains that A and B have in common.
- unsigned common = A->getCommonDomains(B->AvailableDomains);
- if (!common)
- return false;
- A->AvailableDomains = common;
- A->Dist = std::max(A->Dist, B->Dist);
- A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
- for (unsigned rx = 0; rx != NumRegs; ++rx)
- if (LiveRegs[rx] == B)
- SetLiveReg(rx, A);
- return true;
-}
-
-void SSEDomainFixPass::enterBasicBlock() {
- // Try to coalesce live-out registers from predecessors.
- for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
- e = MBB->livein_end(); i != e; ++i) {
- int rx = RegIndex(*i);
- if (rx < 0) continue;
- for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
- pe = MBB->pred_end(); pi != pe; ++pi) {
- LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
- if (fi == LiveOuts.end()) continue;
- DomainValue *pdv = fi->second[rx];
- if (!pdv) continue;
- if (!LiveRegs || !LiveRegs[rx]) {
- SetLiveReg(rx, pdv);
- continue;
- }
-
- // We have a live DomainValue from more than one predecessor.
- if (LiveRegs[rx]->isCollapsed()) {
- // We are already collapsed, but predecessor is not. Force him.
- unsigned domain = LiveRegs[rx]->getFirstDomain();
- if (!pdv->isCollapsed() && pdv->hasDomain(domain))
- Collapse(pdv, domain);
- continue;
- }
-
- // Currently open, merge in predecessor.
- if (!pdv->isCollapsed())
- Merge(LiveRegs[rx], pdv);
- else
- Force(rx, pdv->getFirstDomain());
- }
- }
-}
-
-// A hard instruction only works in one domain. All input registers will be
-// forced into that domain.
-void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
- // Collapse all uses.
- for (unsigned i = mi->getDesc().getNumDefs(),
- e = mi->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
- if (rx < 0) continue;
- Force(rx, domain);
- }
-
- // Kill all defs and force them.
- for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
- if (rx < 0) continue;
- Kill(rx);
- Force(rx, domain);
- }
-}
-
-// A soft instruction can be changed to work in other domains given by mask.
-void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
- // Bitmask of available domains for this instruction after taking collapsed
- // operands into account.
- unsigned available = mask;
-
- // Scan the explicit use operands for incoming domains.
- SmallVector<int, 4> used;
- if (LiveRegs)
- for (unsigned i = mi->getDesc().getNumDefs(),
- e = mi->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
- if (rx < 0) continue;
- if (DomainValue *dv = LiveRegs[rx]) {
- // Bitmask of domains that dv and available have in common.
- unsigned common = dv->getCommonDomains(available);
- // Is it possible to use this collapsed register for free?
- if (dv->isCollapsed()) {
- // Restrict available domains to the ones in common with the operand.
- // If there are no common domains, we must pay the cross-domain
- // penalty for this operand.
- if (common) available = common;
- } else if (common)
- // Open DomainValue is compatible, save it for merging.
- used.push_back(rx);
- else
- // Open DomainValue is not compatible with instruction. It is useless
- // now.
- Kill(rx);
- }
- }
-
- // If the collapsed operands force a single domain, propagate the collapse.
- if (isPowerOf2_32(available)) {
- unsigned domain = CountTrailingZeros_32(available);
- TII->SetSSEDomain(mi, domain);
- visitHardInstr(mi, domain);
- return;
- }
-
- // Kill off any remaining uses that don't match available, and build a list of
- // incoming DomainValues that we want to merge.
- SmallVector<DomainValue*,4> doms;
- for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
- int rx = *i;
- DomainValue *dv = LiveRegs[rx];
- // This useless DomainValue could have been missed above.
- if (!dv->getCommonDomains(available)) {
- Kill(*i);
- continue;
- }
- // sorted, uniqued insert.
- bool inserted = false;
- for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
- i != e && !inserted; ++i) {
- if (dv == *i)
- inserted = true;
- else if (dv->Dist < (*i)->Dist) {
- inserted = true;
- doms.insert(i, dv);
- }
- }
- if (!inserted)
- doms.push_back(dv);
- }
-
- // doms are now sorted in order of appearance. Try to merge them all, giving
- // priority to the latest ones.
- DomainValue *dv = 0;
- while (!doms.empty()) {
- if (!dv) {
- dv = doms.pop_back_val();
- continue;
- }
-
- DomainValue *latest = doms.pop_back_val();
- if (Merge(dv, latest)) continue;
-
- // If latest didn't merge, it is useless now. Kill all registers using it.
- for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
- if (LiveRegs[*i] == latest)
- Kill(*i);
- }
-
- // dv is the DomainValue we are going to use for this instruction.
- if (!dv)
- dv = Alloc();
- dv->Dist = Distance;
- dv->AvailableDomains = available;
- dv->Instrs.push_back(mi);
-
- // Finally set all defs and non-collapsed uses to dv.
- for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
- if (rx < 0) continue;
- if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
- Kill(rx);
- SetLiveReg(rx, dv);
- }
- }
-}
-
-void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
- // Process explicit defs, kill any XMM registers redefined.
- for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
- if (rx < 0) continue;
- Kill(rx);
- }
-}
-
-bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
- MF = &mf;
- TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
- TRI = MF->getTarget().getRegisterInfo();
- MBB = 0;
- LiveRegs = 0;
- Distance = 0;
- assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
-
- // If no XMM registers are used in the function, we can skip it completely.
- bool anyregs = false;
- for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
- E = X86::VR128RegClass.end(); I != E; ++I)
- if (MF->getRegInfo().isPhysRegUsed(*I)) {
- anyregs = true;
- break;
- }
- if (!anyregs) return false;
-
- MachineBasicBlock *Entry = MF->begin();
- SmallPtrSet<MachineBasicBlock*, 16> Visited;
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
- DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
- DFI != DFE; ++DFI) {
- MBB = *DFI;
- enterBasicBlock();
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
- MachineInstr *mi = I;
- if (mi->isDebugValue()) continue;
- ++Distance;
- std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
- if (domp.first)
- if (domp.second)
- visitSoftInstr(mi, domp.second);
- else
- visitHardInstr(mi, domp.first);
- else if (LiveRegs)
- visitGenericInstr(mi);
- }
-
- // Save live registers at end of MBB - used by enterBasicBlock().
- if (LiveRegs)
- LiveOuts.insert(std::make_pair(MBB, LiveRegs));
- LiveRegs = 0;
- }
-
- // Clear the LiveOuts vectors. Should we also collapse any remaining
- // DomainValues?
- for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
- i != e; ++i)
- delete[] i->second;
- LiveOuts.clear();
- Avail.clear();
- Allocator.DestroyAll();
-
- return false;
-}
-
-FunctionPass *llvm::createSSEDomainFixPass() {
- return new SSEDomainFixPass();
-}
diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 08d4d84..52a67f7 100644
--- a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -9,7 +9,7 @@
#include "X86.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheX86_32Target, llvm::TheX86_64Target;
diff --git a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt
deleted file mode 100644
index 3ad5f99..0000000
--- a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMX86Utils
- X86ShuffleDecode.cpp
- )
-add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/X86/Utils/Makefile b/contrib/llvm/lib/Target/X86/Utils/Makefile
deleted file mode 100644
index 1df6f0f..0000000
--- a/contrib/llvm/lib/Target/X86/Utils/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMX86Utils
-
-# Hack: we need to include 'main' x86 target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index cd06060..aeb3309 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -167,24 +167,77 @@ void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
- // Handle vector lengths > 128 bits. Define a "section" as a set of
- // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
- // sections.
- unsigned NumSections = VT.getSizeInBits() / 128;
- if (NumSections == 0 ) NumSections = 1; // Handle MMX
- unsigned NumSectionElts = NumElts / NumSections;
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
unsigned Start = 0;
- unsigned End = NumSectionElts / 2;
- for (unsigned s = 0; s < NumSections; ++s) {
+ unsigned End = NumLaneElts / 2;
+ for (unsigned s = 0; s < NumLanes; ++s) {
for (unsigned i = Start; i != End; ++i) {
ShuffleMask.push_back(i); // Reads from dest/src1
- ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
+ ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2
}
// Process the next 128 bits.
- Start += NumSectionElts;
- End += NumSectionElts;
+ Start += NumLaneElts;
+ End += NumLaneElts;
}
}
+// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes and the mask of the first lane must
+// be the same of the second.
+void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumLanes = (NumElts*32)/128;
+ unsigned LaneSize = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = 0; i != LaneSize; ++i) {
+ unsigned Idx = (Imm >> (i*2)) & 0x3 ;
+ ShuffleMask.push_back(Idx+(l*LaneSize));
+ }
+ }
+}
+
+// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes but the mask of the first lane can
+// be the different of the second (not like VPERMILPS).
+void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumLanes = (NumElts*64)/128;
+ unsigned LaneSize = NumElts/NumLanes;
+
+ for (unsigned l = 0; l < NumLanes; ++l) {
+ for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ unsigned Idx = (Imm >> i) & 0x1;
+ ShuffleMask.push_back(Idx+(l*LaneSize));
+ }
+ }
+}
+
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned HalfSize = VT.getVectorNumElements()/2;
+ unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
+ unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
+
+ for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+}
+
+void DecodeVPERM2F128Mask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
+ // has information about the instruction and not the types. So for
+ // instruction comments purpose, assume the 256-bit vector is v4i64.
+ return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
+}
+
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index b18f670..58193e6 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -82,6 +82,26 @@ void DecodeUNPCKLPDMask(unsigned NElts,
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
+
+// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes and the mask of the first lane must
+// be the same of the second.
+void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes but the mask of the first lane can
+// be the different of the second (not like VPERMILPS).
+void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeVPERM2F128Mask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
} // llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index ec52dfb..81e9422 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -15,6 +15,7 @@
#ifndef TARGET_X86_H
#define TARGET_X86_H
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
@@ -24,16 +25,8 @@ namespace llvm {
class FunctionPass;
class JITCodeEmitter;
class MachineCodeEmitter;
-class MCCodeEmitter;
-class MCContext;
-class MCInstrInfo;
-class MCObjectWriter;
-class MCSubtargetInfo;
class Target;
-class TargetAsmBackend;
class X86TargetMachine;
-class formatted_raw_ostream;
-class raw_ostream;
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
@@ -51,22 +44,16 @@ FunctionPass* createGlobalBaseRegPass();
///
FunctionPass *createX86FloatingPointStackifierPass();
-/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
-/// crossings.
-FunctionPass *createSSEDomainFixPass();
+/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
+/// before each call to avoid transition penalty between functions encoded with
+/// AVX and SSE.
+FunctionPass *createX86IssueVZeroUpperPass();
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
/// to the specified MCE object.
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
JITCodeEmitter &JCE);
-MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-
-TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
-TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
-
/// createX86EmitCodeToMemory - Returns a pass that converts a register
/// allocated function into raw machine code in a dynamically
/// allocated chunk of memory.
@@ -79,13 +66,6 @@ FunctionPass *createEmitX86CodeToMemory();
///
FunctionPass *createX86MaxStackAlignmentHeuristicPass();
-
-/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
-MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint32_t CPUType,
- uint32_t CPUSubtype);
-
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 4ccb43f..104b91f 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This is a target description file for the Intel i386 architecture, referred to
-// here as the "X86" architecture.
+// This is a target description file for the Intel i386 architecture, referred
+// to here as the "X86" architecture.
//
//===----------------------------------------------------------------------===//
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
"64-bit mode (x86_64)">;
+def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
+ "Native Client mode">;
+
//===----------------------------------------------------------------------===//
// X86 Subtarget features.
//===----------------------------------------------------------------------===//
@@ -68,6 +71,9 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureCMOV]>;
+def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true",
+ "64-bit with cmpxchg16b",
+ [Feature64Bit]>;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
@@ -90,6 +96,16 @@ def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"Allow unaligned memory operands on vector/SIMD instructions">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions">;
+def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
+ "Support MOVBE instruction">;
+def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true",
+ "Support RDRAND instruction">;
+def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
+ "Support 16-bit floating point conversion instructions">;
+def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
+ "Support LZCNT instruction">;
+def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
+ "Support BMI instructions">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -112,27 +128,43 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE,
+ FeatureSlowBTMem]>;
// "Arrandale" along with corei3 and corei5
-def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem, FeatureAES]>;
-def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem]>;
+def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem, FeatureAES]>;
+def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem, FeatureAES,
+ FeatureCLMUL]>;
+// Sandy Bridge
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
// FIXME: Disabling AVX for now since it's not ready.
-def : Proc<"corei7-avx", [FeatureSSE42, Feature64Bit,
+def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B,
FeatureAES, FeatureCLMUL]>;
+// Ivy Bridge
+def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL,
+ FeatureRDRAND, FeatureF16C]>;
+
+// Haswell
+def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES,
+ FeatureCLMUL, FeatureRDRAND, FeatureF16C,
+ FeatureFMA3, FeatureMOVBE, FeatureLZCNT,
+ FeatureBMI]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
@@ -150,19 +182,21 @@ def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
FeatureSlowBTMem]>;
def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
FeatureSlowBTMem]>;
-def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
-def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
-def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+ Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"istanbul", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
- Feature3DNowA]>;
-def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+ Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSSE4A, Feature3DNowA]>;
+def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A,
Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 99b4479..4c3ff02 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -35,12 +35,12 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
using namespace llvm;
@@ -504,8 +504,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// .indirect_symbol _foo
OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
MCSA_IndirectSymbol);
- // hlt; hlt; hlt; hlt; hlt hlt = 0xf4 = -12.
- const char HltInsts[] = { -12, -12, -12, -12, -12 };
+ // hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
+ const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
}
@@ -708,21 +708,8 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// Target Registry Stuff
//===----------------------------------------------------------------------===//
-static MCInstPrinter *createX86MCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI);
- if (SyntaxVariant == 1)
- return new X86IntelInstPrinter(MAI);
- return 0;
-}
-
// Force static initialization.
extern "C" void LLVMInitializeX86AsmPrinter() {
RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
-
- TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
index 4b11db7..aeff03a 100644
--- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -98,8 +98,6 @@ namespace {
void emitMemModRMByte(const MachineInstr &MI,
unsigned Op, unsigned RegOpcodeField,
intptr_t PCAdj = 0);
-
- unsigned getX86RegNum(unsigned RegNo) const;
};
template<class CodeEmitter>
@@ -169,7 +167,7 @@ static unsigned determineREX(const MachineInstr &MI) {
const MachineOperand& MO = MI.getOperand(i);
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg))
+ if (X86II::isX86_64NonExtLowByteReg(Reg))
REX |= 0x40;
}
}
@@ -346,11 +344,6 @@ void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
MCE.emitWordLE(0);
}
-template<class CodeEmitter>
-unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const {
- return X86RegisterInfo::getX86RegNum(RegNo);
-}
-
inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
unsigned RM) {
assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
@@ -360,7 +353,7 @@ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
template<class CodeEmitter>
void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
unsigned RegOpcodeFld){
- MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg)));
}
template<class CodeEmitter>
@@ -498,7 +491,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
// 2-7) and absolute references.
unsigned BaseRegNo = -1U;
if (BaseReg != 0 && BaseReg != X86::RIP)
- BaseRegNo = getX86RegNum(BaseReg);
+ BaseRegNo = X86_MC::getX86RegNum(BaseReg);
if (// The SIB byte must be used if there is an index register.
IndexReg.getReg() == 0 &&
@@ -566,7 +559,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
}
// Calculate what the SS field value should be...
- static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
unsigned SS = SSTable[Scale.getImm()];
if (BaseReg == 0) {
@@ -574,15 +567,15 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
// Manual 2A, table 2-7. The displacement has already been output.
unsigned IndexRegNo;
if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg.getReg());
+ IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg());
else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
IndexRegNo = 4;
emitSIBByte(SS, IndexRegNo, 5);
} else {
- unsigned BaseRegNo = getX86RegNum(BaseReg);
+ unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg);
unsigned IndexRegNo;
if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg.getReg());
+ IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg());
else
IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
emitSIBByte(SS, IndexRegNo, BaseRegNo);
@@ -809,7 +802,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
}
case X86II::AddRegFrm: {
- MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+ MCE.emitByte(BaseOpcode +
+ X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg()));
if (CurOp == NumOps)
break;
@@ -844,7 +838,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+ X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg()));
CurOp += 2;
if (CurOp != NumOps)
emitConstant(MI.getOperand(CurOp++).getImm(),
@@ -854,7 +848,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestMem: {
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp,
- getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
+ X86_MC::getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
.getReg()));
CurOp += X86::AddrNumOperands + 1;
if (CurOp != NumOps)
@@ -866,7 +860,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMSrcReg:
MCE.emitByte(BaseOpcode);
emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
- getX86RegNum(MI.getOperand(CurOp).getReg()));
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
CurOp += 2;
if (CurOp != NumOps)
emitConstant(MI.getOperand(CurOp++).getImm(),
@@ -880,8 +874,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
X86II::getSizeOfImm(Desc->TSFlags) : 0;
MCE.emitByte(BaseOpcode);
- emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
- PCAdj);
+ emitMemModRMByte(MI, CurOp+1,
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
CurOp += AddrOperands + 1;
if (CurOp != NumOps)
emitConstant(MI.getOperand(CurOp++).getImm(),
@@ -968,7 +962,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
MCE.emitByte(BaseOpcode);
// Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp).getReg()));
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
++CurOp;
break;
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
index f1d7ede..4a72d15 100644
--- a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -147,7 +147,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
return SymOffset - (RelOffset + 4);
else
- assert("computeRelocation unknown for this relocation type");
+ assert(0 && "computeRelocation unknown for this relocation type");
return 0;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 21e163a..f912b28 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -22,6 +22,7 @@
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Operator.h"
@@ -59,8 +60,8 @@ public:
explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
- X86ScalarSSEf64 = Subtarget->hasSSE2();
- X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX();
}
virtual bool TargetSelectInstruction(const Instruction *I);
@@ -134,7 +135,7 @@ private:
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+ bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
bool IsMemcpySmall(uint64_t Len);
@@ -144,7 +145,7 @@ private:
} // end anonymous namespace.
-bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
@@ -198,8 +199,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
RC = X86::GR64RegisterClass;
break;
case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::MOVSSrm;
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp32m;
@@ -207,8 +208,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
}
break;
case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::MOVSDrm;
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp64m;
@@ -250,10 +251,12 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
case MVT::i32: Opc = X86::MOV32mr; break;
case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
case MVT::f32:
- Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
+ Opc = X86ScalarSSEf32 ?
+ (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
break;
case MVT::f64:
- Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
+ Opc = X86ScalarSSEf64 ?
+ (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
break;
}
@@ -336,7 +339,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
U = C;
}
- if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
@@ -399,7 +402,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
const Value *Op = *i;
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
continue;
@@ -465,14 +468,23 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Handle constant address.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models or TLS yet.
+ // Can't handle alternate code models yet.
if (TM.getCodeModel() != CodeModel::Small)
return false;
+ // Can't handle TLS yet.
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal())
return false;
+ // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
+ // it works...).
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalVariable *GVar =
+ dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
+ if (GVar->isThreadLocal())
+ return false;
+
// RIP-relative addresses can't have additional register operands, so if
// we've already folded stuff into the addressing mode, just force the
// global value into its own register, which we can use as the basereg.
@@ -658,6 +670,10 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(const Instruction *I) {
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
@@ -780,6 +796,10 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
MVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
return false;
@@ -797,14 +817,20 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
}
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+ bool HasAVX = Subtarget->hasAVX();
+ bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1();
+ bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2();
+
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
case MVT::i8: return X86::CMP8rr;
case MVT::i16: return X86::CMP16rr;
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
- case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
- case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
+ case MVT::f32:
+ return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
+ case MVT::f64:
+ return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
}
}
@@ -1207,7 +1233,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
// fpext from float to double.
- if (Subtarget->hasSSE2() &&
+ if (X86ScalarSSEf64 &&
I->getType()->isDoubleTy()) {
const Value *V = I->getOperand(0);
if (V->getType()->isFloatTy()) {
@@ -1226,7 +1252,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
}
bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
- if (Subtarget->hasSSE2()) {
+ if (X86ScalarSSEf64) {
if (I->getType()->isFloatTy()) {
const Value *V = I->getOperand(0);
if (V->getType()->isDoubleTy()) {
@@ -1365,6 +1391,9 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
case Intrinsic::memset: {
const MemSetInst &MSI = cast<MemSetInst>(I);
+ if (MSI.isVolatile())
+ return false;
+
unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
return false;
@@ -1411,7 +1440,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Replace "add with overflow" intrinsics with an "add" instruction followed
// by a seto/setc instruction.
const Function *Callee = I.getCalledFunction();
- const Type *RetTy =
+ Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
MVT VT;
@@ -1484,8 +1513,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
return false;
- const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
bool isVarArg = FTy->isVarArg();
// Don't know how to handle Win64 varargs yet. Nothing special needed for
@@ -1547,8 +1576,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
Flags.setZExt();
if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
- const PointerType *Ty = cast<PointerType>(ArgVal->getType());
- const Type *ElementTy = Ty->getElementType();
+ PointerType *Ty = cast<PointerType>(ArgVal->getType());
+ Type *ElementTy = Ty->getElementType();
unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
unsigned FrameAlign = CS.getParamAlignment(AttrInd);
if (!FrameAlign)
@@ -1600,7 +1629,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (ArgReg == 0) return false;
- const Type *ArgTy = ArgVal->getType();
+ Type *ArgTy = ArgVal->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
@@ -1709,7 +1738,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
assert(Res && "memcpy length already checked!"); (void)Res;
} else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
// If this is a really simple value, emit this with the Value* version
- //of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // of X86FastEmitStore. If it isn't simple, we don't want to do this,
// as it can cause us to reevaluate the argument.
X86FastEmitStore(ArgVT, ArgVal, AM);
} else {
@@ -1965,8 +1994,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
RC = X86::GR64RegisterClass;
break;
case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::MOVSSrm;
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp32m;
@@ -1974,8 +2003,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
}
break;
case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::MOVSDrm;
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp64m;
@@ -2070,8 +2099,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
switch (VT.SimpleTy) {
default: return false;
case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::FsFLD0SS;
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp032;
@@ -2079,8 +2108,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
}
break;
case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::FsFLD0SD;
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp064;
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 6eed6abd..e3461c8 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -260,6 +260,21 @@ namespace {
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
+ /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
+ /// RegNo. If any PendingST registers still need the RegNo value, duplicate
+ /// them to new scratch registers.
+ void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
+ for (unsigned i = 0; i != NumPendingSTs; ++i) {
+ if (PendingST[i] != RegNo)
+ continue;
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Duplicating pending ST" << i
+ << " in FP" << RegNo << " to FP" << SR << '\n');
+ duplicateToTop(RegNo, SR, I);
+ PendingST[i] = SR;
+ }
+ }
+
/// popStackAfter - Pop the current value off of the top of the FP stack
/// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
@@ -406,6 +421,10 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
if (MI->isCopy() && isFPCopy(MI))
FPInstClass = X86II::SpecialFP;
+ if (MI->isImplicitDef() &&
+ X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
+ FPInstClass = X86II::SpecialFP;
+
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
@@ -461,6 +480,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
}
dumpStack();
);
+ (void)PrevMI;
Changed = true;
}
@@ -969,6 +989,9 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+ if (KillsSrc)
+ duplicatePendingSTBeforeKill(Reg, I);
+
// FISTP64m is strange because there isn't a non-popping versions.
// If we have one _and_ we don't want to pop the operand, duplicate the value
// on the stack instead of moving it. This ensure that popping the value is
@@ -1032,6 +1055,7 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
if (KillsSrc) {
+ duplicatePendingSTBeforeKill(Reg, I);
// If this is the last use of the source register, just make sure it's on
// the top of the stack.
moveToTop(Reg, I);
@@ -1318,6 +1342,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// When the source is killed, allocate a scratch FP register.
if (KillsSrc) {
+ duplicatePendingSTBeforeKill(SrcFP, I);
unsigned Slot = getSlot(SrcFP);
unsigned SR = getScratchReg();
PendingST[DstST] = SR;
@@ -1369,6 +1394,15 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
break;
}
+ case TargetOpcode::IMPLICIT_DEF: {
+ // All FP registers must be explicitly defined, so load a 0 instead.
+ unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
+ DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(Reg);
+ break;
+ }
+
case X86::FpPOP_RETVAL: {
// The FpPOP_RETVAL instruction is used after calls that return a value on
// the floating point stack. We cannot model this with ST defs since CALL
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index ed45a9a..d54f4ae 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -15,6 +15,7 @@
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -91,12 +92,12 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
return 0;
static const unsigned CallerSavedRegs32Bit[] = {
- X86::EAX, X86::EDX, X86::ECX
+ X86::EAX, X86::EDX, X86::ECX, 0
};
static const unsigned CallerSavedRegs64Bit[] = {
X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
- X86::R8, X86::R9, X86::R10, X86::R11
+ X86::R8, X86::R9, X86::R10, X86::R11, 0
};
unsigned Opc = MBBI->getOpcode();
@@ -283,8 +284,8 @@ static bool isEAXLiveIn(MachineFunction &MF) {
}
void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
- MCSymbol *Label,
- unsigned FramePtr) const {
+ MCSymbol *Label,
+ unsigned FramePtr) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
@@ -346,6 +347,247 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
}
}
+/// getCompactUnwindRegNum - Get the compact unwind number for a given
+/// register. The number corresponds to the enum lists in
+/// compact_unwind_encoding.h.
+static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
+ int Idx = 1;
+ for (; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+}
+
+/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
+/// used with frameless stacks. It is passed the number of registers to be saved
+/// and an array of the registers saved.
+static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
+ unsigned RegCount,
+ bool Is64Bit) {
+ // The saved registers are numbered from 1 to 6. In order to encode the order
+ // in which they were saved, we re-number them according to their place in the
+ // register order. The re-numbering is relative to the last re-numbered
+ // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ static const unsigned CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const unsigned CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+
+ uint32_t RenumRegs[6];
+ for (unsigned i = 6 - RegCount; i < 6; ++i) {
+ int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+
+ unsigned Countless = 0;
+ for (unsigned j = 6 - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+}
+
+/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
+/// compact encoding with a frame pointer.
+static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
+ bool Is64Bit) {
+ static const unsigned CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const unsigned CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+
+ // Encode the registers in the order they were saved, 3-bits per register. The
+ // registers are numbered from 1 to 6.
+ uint32_t RegEnc = 0;
+ for (int I = 5; I >= 0; --I) {
+ unsigned Reg = SavedRegs[I];
+ if (Reg == 0) break;
+ int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
+ if (CURegNum == -1)
+ return ~0U;
+ RegEnc |= (CURegNum & 0x7) << (5 - I);
+ }
+
+ assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
+ return RegEnc;
+}
+
+uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ bool Is64Bit = STI.is64Bit();
+ bool HasFP = hasFP(MF);
+
+ unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
+ int SavedRegIdx = 6;
+
+ unsigned OffsetSize = (Is64Bit ? 8 : 4);
+
+ unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r);
+ unsigned PushInstrSize = 1;
+ unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
+ unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta);
+ unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
+
+ unsigned StackDivide = (Is64Bit ? 8 : 4);
+
+ unsigned InstrOffset = 0;
+ unsigned CFAOffset = 0;
+ unsigned StackAdjust = 0;
+
+ MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
+ bool ExpectEnd = false;
+ for (MachineBasicBlock::iterator
+ MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opc = MI.getOpcode();
+ if (Opc == X86::PROLOG_LABEL) continue;
+ if (!MI.getFlag(MachineInstr::FrameSetup)) break;
+
+ // We don't exect any more prolog instructions.
+ if (ExpectEnd) return 0;
+
+ if (Opc == PushInstr) {
+ // If there are too many saved registers, we cannot use compact encoding.
+ if (--SavedRegIdx < 0) return 0;
+
+ SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg();
+ CFAOffset += OffsetSize;
+ InstrOffset += PushInstrSize;
+ } else if (Opc == MoveInstr) {
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ if (DstReg != FramePtr || SrcReg != StackPtr)
+ return 0;
+
+ CFAOffset = 0;
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ InstrOffset += MoveInstrSize;
+ } else if (Opc == SubtractInstr) {
+ if (StackAdjust)
+ // We all ready have a stack pointer adjustment.
+ return 0;
+
+ if (!MI.getOperand(0).isReg() ||
+ MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
+ MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm())
+ // We need this to be a stack adjustment pointer. Something like:
+ //
+ // %RSP<def> = SUB64ri8 %RSP, 48
+ return 0;
+
+ StackAdjust = MI.getOperand(2).getImm() / StackDivide;
+ SubtractInstrIdx += InstrOffset;
+ ExpectEnd = true;
+ }
+ }
+
+ // Encode that we are using EBP/RBP as the frame pointer.
+ uint32_t CompactUnwindEncoding = 0;
+ CFAOffset /= StackDivide;
+ if (HasFP) {
+ if ((CFAOffset & 0xFF) != CFAOffset)
+ // Offset was too big for compact encoding.
+ return 0;
+
+ // Get the encoding of the saved registers when we have a frame pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
+ if (RegEnc == ~0U)
+ return 0;
+
+ CompactUnwindEncoding |= 0x01000000;
+ CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16;
+ CompactUnwindEncoding |= RegEnc & 0x7FFF;
+ } else {
+ unsigned FullOffset = CFAOffset + StackAdjust;
+ if ((FullOffset & 0xFF) == FullOffset) {
+ // Frameless stack.
+ CompactUnwindEncoding |= 0x02000000;
+ CompactUnwindEncoding |= (FullOffset & 0xFF) << 16;
+ } else {
+ if ((CFAOffset & 0x7) != CFAOffset)
+ // The extra stack adjustments are too big for us to handle.
+ return 0;
+
+ // Frameless stack with an offset too large for us to encode compactly.
+ CompactUnwindEncoding |= 0x03000000;
+
+ // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
+ // instruction.
+ CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
+
+ // Encode any extra stack stack changes (done via push instructions).
+ CompactUnwindEncoding |= (CFAOffset & 0x7) << 13;
+ }
+
+ // Get the encoding of the saved registers when we don't have a frame
+ // pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs,
+ 6 - SavedRegIdx,
+ Is64Bit);
+ if (RegEnc == ~0U) return 0;
+ CompactUnwindEncoding |= RegEnc & 0x3FF;
+ }
+
+ return CompactUnwindEncoding;
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -370,7 +612,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
-
DebugLoc DL;
// If we're forcing a stack realignment we can't rely on just the frame
@@ -398,7 +639,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
- !IsWin64) { // Win64 has no Red Zone
+ !IsWin64 && // Win64 has no Red Zone
+ !EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -459,7 +701,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark the place where EBP/RBP was saved.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(FrameLabel);
// Define the current CFA rule to use the provided offset.
if (StackSize) {
@@ -478,7 +721,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
}
- // Update EBP with the new base value...
+ // Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr)
@@ -487,7 +730,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(FrameLabel);
// Define the current CFA to use the EBP/RBP register.
MachineLocation FPDst(FramePtr);
@@ -504,8 +748,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (RegInfo->needsStackRealignment(MF)) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
- StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+ .addReg(StackPtr)
+ .addImm(-MaxAlign)
+ .setMIFlag(MachineInstr::FrameSetup);
// The EFLAGS implicit def is dead.
MI->getOperand(3).setIsDead();
@@ -522,6 +768,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
(MBBI->getOpcode() == X86::PUSH32r ||
MBBI->getOpcode() == X86::PUSH64r)) {
PushedRegs = true;
+ MBBI->setFlag(MachineInstr::FrameSetup);
++MBBI;
if (!HasFP && needsFrameMoves) {
@@ -530,8 +777,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
// Define the current CFA rule to use the provided offset.
- unsigned Ptr = StackSize ?
- MachineLocation::VirtualFP : StackPtr;
+ unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr;
MachineLocation SPDst(Ptr);
MachineLocation SPSrc(Ptr, StackOffset);
Moves.push_back(MachineMove(Label, SPDst, SPSrc));
@@ -586,26 +832,30 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
- .addReg(X86::EAX, RegState::Kill);
+ .addReg(X86::EAX, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
}
if (Is64Bit) {
// Handle the 64-bit Windows ABI case where we need to call __chkstk.
// Function prologue is responsible for adjusting the stack pointer.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
- .addImm(NumBytes);
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
} else {
// Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
// We'll also use 4 already allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
// MSVC x64's __chkstk needs to adjust %rsp.
// FIXME: %rax preserves the offset and should be available.
@@ -618,6 +868,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
X86::EAX),
StackPtr, false, NumBytes - 4);
+ MI->setFlag(MachineInstr::FrameSetup);
MBB.insert(MBBI, MI);
}
} else if (NumBytes)
@@ -627,7 +878,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(Label);
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
@@ -647,6 +899,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (PushedRegs)
emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
}
+
+ // Darwin 10.7 and greater has support for compact unwind encoding.
+ if (STI.getTargetTriple().isMacOSX() &&
+ !STI.getTargetTriple().isMacOSXVersionLT(10, 7))
+ MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF));
}
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
@@ -844,23 +1101,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void
-X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- // Calculate amount of bytes used for return address storing
- int stackGrowth = (STI.is64Bit() ? -8 : -4);
- const X86RegisterInfo *RI = TM.getRegisterInfo();
-
- // Initial state of the frame pointer is esp+stackGrowth.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(RI->getStackRegister(), stackGrowth);
- Moves.push_back(MachineMove(0, Dst, Src));
-
- // Add return address to move list
- MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
- MachineLocation CSSrc(RI->getRARegister());
- Moves.push_back(MachineMove(0, CSDst, CSSrc));
-}
-
int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
const X86RegisterInfo *RI =
static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
@@ -873,9 +1113,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con
// Skip the saved EBP.
Offset += RI->getSlotSize();
} else {
- unsigned Align = MFI->getObjectAlignment(FI);
- assert((-(Offset + StackSize)) % Align == 0);
- Align = 0;
+ assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
return Offset + StackSize;
}
// FIXME: Support tail calls
@@ -1027,184 +1265,183 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
true);
assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
- FrameIdx = 0;
+ (void)FrameIdx;
}
}
-/// permuteEncode - Create the permutation encoding used with frameless
-/// stacks. It is passed the number of registers to be saved and an array of the
-/// registers saved.
-static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) {
- // The saved registers are numbered from 1 to 6. In order to encode the order
- // in which they were saved, we re-number them according to their place in the
- // register order. The re-numbering is relative to the last re-numbered
- // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
- //
- // Orig Re-Num
- // ---- ------
- // 6 6
- // 2 2
- // 4 3
- // 5 3
- //
- bool Used[7] = { false, false, false, false, false, false, false };
- uint32_t RenumRegs[6];
- for (unsigned I = 0; I < SavedCount; ++I) {
- uint32_t Renum = 0;
- for (unsigned U = 1; U < 7; ++U) {
- if (U == Registers[I])
- break;
- if (!Used[U])
- ++Renum;
- }
-
- Used[Registers[I]] = true;
- RenumRegs[I] = Renum;
+static bool
+HasNestArgument(const MachineFunction *MF) {
+ const Function *F = MF->getFunction();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; I++) {
+ if (I->hasNestAttr())
+ return true;
}
+ return false;
+}
- // Take the renumbered values and encode them into a 10-bit number.
- uint32_t permutationEncoding = 0;
- switch (SavedCount) {
- case 6:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 5:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 4:
- permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1]
- + 3 * RenumRegs[2] + RenumRegs[3];
- break;
- case 3:
- permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1]
- + RenumRegs[2];
- break;
- case 2:
- permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1];
- break;
- case 1:
- permutationEncoding |= RenumRegs[0];
- break;
+static unsigned
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
+ if (Is64Bit) {
+ return X86::R11;
+ } else {
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+ bool IsNested = HasNestArgument(&MF);
+
+ if (CallingConvention == CallingConv::X86_FastCall) {
+ if (IsNested) {
+ report_fatal_error("Segmented stacks does not support fastcall with "
+ "nested function.");
+ return -1;
+ } else {
+ return X86::EAX;
+ }
+ } else {
+ if (IsNested)
+ return X86::EDX;
+ else
+ return X86::ECX;
+ }
}
-
- return permutationEncoding;
}
-uint32_t X86FrameLowering::
-getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
- int DataAlignmentFactor, bool IsEH) const {
- uint32_t Encoding = 0;
- int CFAOffset = 0;
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
- unsigned SavedRegIdx = 0;
- int FramePointerReg = -1;
+void
+X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
+ MachineBasicBlock &prologueMBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ uint64_t StackSize;
+ bool Is64Bit = STI.is64Bit();
+ unsigned TlsReg, TlsOffset;
+ DebugLoc DL;
+ const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
- for (ArrayRef<MCCFIInstruction>::const_iterator
- I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
- const MCCFIInstruction &Inst = *I;
- MCSymbol *Label = Inst.getLabel();
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+ assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+ "Scratch register is live-in");
- // Ignore invalid labels.
- if (Label && !Label->isDefined()) continue;
+ if (MF.getFunction()->isVarArg())
+ report_fatal_error("Segmented stacks do not support vararg functions.");
+ if (!ST->isTargetLinux())
+ report_fatal_error("Segmented stacks supported only on linux.");
- unsigned Operation = Inst.getOperation();
- if (Operation != MCCFIInstruction::Move &&
- Operation != MCCFIInstruction::RelMove)
- // FIXME: We can't handle this frame just yet.
- return 0;
-
- const MachineLocation &Dst = Inst.getDestination();
- const MachineLocation &Src = Inst.getSource();
- const bool IsRelative = (Operation == MCCFIInstruction::RelMove);
-
- if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
- if (Src.getReg() != MachineLocation::VirtualFP) {
- // DW_CFA_def_cfa
- assert(FramePointerReg == -1 &&"Defining more than one frame pointer?");
- if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP &&
- TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP)
- // The frame pointer isn't EBP/RBP. Cannot make unwind information
- // compact.
- return 0;
- FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH);
- } // else DW_CFA_def_cfa_offset
-
- if (IsRelative)
- CFAOffset += Src.getOffset();
- else
- CFAOffset -= Src.getOffset();
+ MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool IsNested = false;
- continue;
- }
+ // We need to know if the function has a nest argument only in 64 bit mode.
+ if (Is64Bit)
+ IsNested = HasNestArgument(&MF);
- if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
- // DW_CFA_def_cfa_register
- assert(FramePointerReg == -1 && "Defining more than one frame pointer?");
+ // The MOV R10, RAX needs to be in a different block, since the RET we emit in
+ // allocMBB needs to be last (terminating) instruction.
+ MachineBasicBlock *restoreR10MBB = NULL;
+ if (IsNested)
+ restoreR10MBB = MF.CreateMachineBasicBlock();
- if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP &&
- TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP)
- // The frame pointer isn't EBP/RBP. Cannot make unwind information
- // compact.
- return 0;
+ for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
+ e = prologueMBB.livein_end(); i != e; i++) {
+ allocMBB->addLiveIn(*i);
+ checkMBB->addLiveIn(*i);
- FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH);
- if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg))
- return 0;
+ if (IsNested)
+ restoreR10MBB->addLiveIn(*i);
+ }
- SavedRegs[0] = 0;
- SavedRegIdx = 0;
- continue;
- }
+ if (IsNested) {
+ allocMBB->addLiveIn(X86::R10);
+ restoreR10MBB->addLiveIn(X86::RAX);
+ }
- unsigned Reg = Src.getReg();
- int Offset = Dst.getOffset();
- if (IsRelative)
- Offset -= CFAOffset;
- Offset /= DataAlignmentFactor;
+ if (IsNested)
+ MF.push_front(restoreR10MBB);
+ MF.push_front(allocMBB);
+ MF.push_front(checkMBB);
+
+ // Eventually StackSize will be calculated by a link-time pass; which will
+ // also decide whether checking code needs to be injected into this particular
+ // prologue.
+ StackSize = MFI->getStackSize();
+
+ // Read the limit off the current stacklet off the stack_guard location.
+ if (Is64Bit) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+
+ BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else {
+ TlsReg = X86::GS;
+ TlsOffset = 0x30;
- if (Offset < 0) {
- // FIXME: Handle?
- // DW_CFA_offset_extended_sf
- return 0;
- } else if (Reg < 64) {
- // DW_CFA_offset + Reg
- if (SavedRegIdx >= 6) return 0;
- int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH);
- if (CURegNum == -1) return 0;
- SavedRegs[SavedRegIdx++] = CURegNum;
- } else {
- // FIXME: Handle?
- // DW_CFA_offset_extended
- return 0;
- }
+ BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
}
- // Bail if there are too many registers to encode.
- if (SavedRegIdx > 6) return 0;
+ // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
+ // It jumps to normal execution of the function body.
+ BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB);
+
+ // On 32 bit we first push the arguments size and then the frame size. On 64
+ // bit, we pass the stack frame size in r10 and the argument size in r11.
+ if (Is64Bit) {
+ // Functions with nested arguments use R10, so it needs to be saved across
+ // the call to _morestack
+
+ if (IsNested)
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
+
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
+ .addImm(StackSize);
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
+ .addImm(X86FI->getArgumentStackSize());
+ MF.getRegInfo().setPhysRegUsed(X86::R10);
+ MF.getRegInfo().setPhysRegUsed(X86::R11);
+ } else {
+ // Since we'll call __morestack, stack alignment needs to be preserved.
+ BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP)
+ .addImm(8);
+ BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ .addImm(X86FI->getArgumentStackSize());
+ BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ .addImm(StackSize);
+ }
- // Check if the offset is too big.
- CFAOffset /= 4;
- if ((CFAOffset & 0xFF) != CFAOffset)
- return 0;
- Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding.
-
- if (FramePointerReg != -1) {
- Encoding |= 0x01000000; // EBP/RBP Unwind Frame
- for (unsigned I = 0; I != SavedRegIdx; ++I) {
- unsigned Reg = SavedRegs[I];
- if (Reg == unsigned(FramePointerReg)) continue;
- Encoding |= (Reg & 0x7) << (I * 3); // Register encoding
- }
+ // __morestack is in libgcc
+ if (Is64Bit)
+ BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack");
+ else
+ BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack");
+
+ // __morestack only seems to remove 8 bytes off the stack. Add back the
+ // additional 8 bytes we added before pushing the arguments.
+ if (!Is64Bit)
+ BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP)
+ .addImm(8);
+ BuildMI(allocMBB, DL, TII.get(X86::RET));
+
+ if (IsNested)
+ BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10)
+ .addReg(X86::RAX);
+
+ if (IsNested) {
+ allocMBB->addSuccessor(restoreR10MBB);
+ restoreR10MBB->addSuccessor(&prologueMBB);
} else {
- Encoding |= 0x02000000; // Frameless unwind with small stack
- Encoding |= (SavedRegIdx & 0x7) << 10;
- Encoding |= permuteEncode(SavedRegIdx, SavedRegs);
+ allocMBB->addSuccessor(&prologueMBB);
}
- return Encoding;
+ checkMBB->addSuccessor(allocMBB);
+ checkMBB->addSuccessor(&prologueMBB);
+
+#ifdef XDEBUG
+ MF.verify();
+#endif
}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
index 14c31ed..6f49064 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -41,6 +41,8 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void adjustForSegmentedStacks(MachineFunction &MF) const;
+
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
@@ -57,11 +59,8 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-
- uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
- int DataAlignmentFactor, bool IsEH) const;
+ uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2b0f283..02b0ff2 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -474,10 +474,15 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
- // If the source and destination are SSE registers, then this is a legal
- // conversion that should not be lowered.
EVT SrcVT = N->getOperand(0).getValueType();
EVT DstVT = N->getValueType(0);
+
+ // If any of the sources are vectors, no fp stack involved.
+ if (SrcVT.isVector() || DstVT.isVector())
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
if (SrcIsSSE && DstIsSSE)
@@ -2168,9 +2173,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);
- // Emit a testb. No special NOREX tricks are needed since there's
- // only one GPR operand!
- return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
+ // target GR8_NOREX registers, so make sure the register class is
+ // forced.
+ return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
Subreg, ShiftedImm);
}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5096d9a..7c8ce17 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51,6 +51,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
using namespace dwarf;
@@ -71,9 +72,6 @@ static SDValue Extract128BitVector(SDValue Vec,
SelectionDAG &DAG,
DebugLoc dl);
-static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
-
-
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
/// simple subregister reference. Idx is an index in the 128 bits we
@@ -85,14 +83,10 @@ static SDValue Extract128BitVector(SDValue Vec,
DebugLoc dl) {
EVT VT = Vec.getValueType();
assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
-
EVT ElVT = VT.getVectorElementType();
-
- int Factor = VT.getSizeInBits() / 128;
-
- EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
- ElVT,
- VT.getVectorNumElements() / Factor);
+ int Factor = VT.getSizeInBits()/128;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements()/Factor);
// Extract from UNDEF is UNDEF.
if (Vec.getOpcode() == ISD::UNDEF)
@@ -111,7 +105,6 @@ static SDValue Extract128BitVector(SDValue Vec,
* ElemsPerChunk);
SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
-
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
VecIdx);
@@ -136,21 +129,18 @@ static SDValue Insert128BitVector(SDValue Result,
assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
-
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-
EVT ResultVT = Result.getValueType();
// Insert the relevant 128 bits.
- unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+ unsigned ElemsPerChunk = 128/ElVT.getSizeInBits();
// This is the index of the first element of the 128-bit chunk
// we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
* ElemsPerChunk);
SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
-
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
VecIdx);
return Result;
@@ -159,34 +149,6 @@ static SDValue Insert128BitVector(SDValue Result,
return SDValue();
}
-/// Given two vectors, concat them.
-static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
- DebugLoc dl = Lower.getDebugLoc();
-
- assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
-
- EVT VT = EVT::getVectorVT(*DAG.getContext(),
- Lower.getValueType().getVectorElementType(),
- Lower.getValueType().getVectorNumElements() * 2);
-
- // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
- assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
-
- // Insert the upper subvector.
- SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
- DAG.getConstant(
- // This is half the length of the result
- // vector. Start inserting the upper 128
- // bits here.
- Lower.getValueType().getVectorNumElements(),
- MVT::i32),
- DAG, dl);
-
- // Insert the lower subvector.
- Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
- return Vec;
-}
-
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
bool is64Bit = Subtarget->is64Bit();
@@ -197,11 +159,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
return new TargetLoweringObjectFileMachO();
}
- if (Subtarget->isTargetELF()) {
- if (is64Bit)
- return new X8664_ELFTargetObjectFile(TM);
- return new X8632_ELFTargetObjectFile(TM);
- }
+ if (Subtarget->isTargetELF())
+ return new TargetLoweringObjectFileELF();
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
@@ -222,6 +181,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
+ // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
@@ -354,7 +315,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
} else if (!UseSoftFloat) {
- if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
+ // Since AVX is a superset of SSE3, only check for SSE here.
+ if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
@@ -417,15 +379,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
- setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ if (Subtarget->hasBMI()) {
+ setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasLZCNT()) {
+ setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
if (Subtarget->hasPOPCNT()) {
@@ -491,8 +462,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasXMM())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
- // We may not have a libcall for MEMBARRIER so we should lower this.
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
@@ -506,9 +477,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
MVT VT = IntVTs[i];
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
@@ -518,6 +491,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
}
+ if (Subtarget->hasCmpxchg16b()) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
+ }
+
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
@@ -539,7 +516,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -556,11 +534,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC,
- (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
- (Subtarget->isTargetCOFF()
- && !Subtarget->isTargetEnvMacho()
- ? Custom : Expand));
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
+ else if (EnableSegmentedStacks)
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Expand);
if (!UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
@@ -739,7 +722,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
@@ -754,6 +737,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand);
for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
setTruncStoreAction((MVT::SimpleValueType)VT,
@@ -816,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
if (!UseSoftFloat && Subtarget->hasXMMInt()) {
@@ -846,10 +830,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v2f64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v16i8, Custom);
- setOperationAction(ISD::VSETCC, MVT::v8i16, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
@@ -925,7 +909,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
}
- if (Subtarget->hasSSE41()) {
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
@@ -944,6 +928,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
@@ -964,10 +954,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE2()) {
+ if (Subtarget->hasXMMInt()) {
setOperationAction(ISD::SRL, MVT::v2i64, Custom);
setOperationAction(ISD::SRL, MVT::v4i32, Custom);
setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i16, Custom);
setOperationAction(ISD::SHL, MVT::v2i64, Custom);
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
@@ -977,15 +968,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
}
- if (Subtarget->hasSSE42())
- setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
+ if (Subtarget->hasSSE42() || Subtarget->hasAVX())
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
if (!UseSoftFloat && Subtarget->hasAVX()) {
- addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
- addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
- addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
- addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
- addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
@@ -1005,6 +997,59 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
+
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SRL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SHL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v16i16, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
+
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+
+ setOperationAction(ISD::ADD, MVT::v4i64, Custom);
+ setOperationAction(ISD::ADD, MVT::v8i32, Custom);
+ setOperationAction(ISD::ADD, MVT::v16i16, Custom);
+ setOperationAction(ISD::ADD, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SUB, MVT::v4i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v8i32, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i16, Custom);
+ setOperationAction(ISD::SUB, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i16, Custom);
+ // Don't lower v32i8 because there is no 128-bit byte mul
+
// Custom lower several nodes for 256-bit types.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1093,6 +1138,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
@@ -1100,7 +1146,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SINT_TO_FP);
@@ -1124,25 +1173,26 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
-MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
- return MVT::i8;
+EVT X86TargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return MVT::i8;
+ return VT.changeVectorElementTypeToInteger();
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
-static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
return;
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
- } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign);
@@ -1159,7 +1209,7 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
-unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = TD->getABITypeAlignment(Ty);
@@ -1203,9 +1253,12 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16))) &&
Subtarget->getStackAlignment() >= 16) {
- if (Subtarget->hasSSE2())
+ if (Subtarget->hasAVX() &&
+ Subtarget->getStackAlignment() >= 32)
+ return MVT::v8f32;
+ if (Subtarget->hasXMMInt())
return MVT::v4i32;
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
@@ -1408,7 +1461,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
- if (!Subtarget->hasSSE2())
+ if (!Subtarget->hasXMMInt())
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
}
}
@@ -1700,6 +1753,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// places.
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
+ (void)LastVal;
LastVal = VA.getValNo();
if (VA.isRegLoc()) {
@@ -1917,6 +1971,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
+ FuncInfo->setArgumentStackSize(StackSize);
+
return Chain;
}
@@ -2744,8 +2800,6 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPS:
- case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
@@ -2754,10 +2808,17 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
+ case X86ISD::VPERM2F128:
return true;
}
return false;
@@ -2783,6 +2844,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
@@ -2796,6 +2861,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PALIGN:
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
+ case X86ISD::VPERM2F128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
@@ -2815,8 +2881,6 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPS:
- case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
@@ -2825,6 +2889,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
@@ -3026,6 +3092,17 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val < 0) || (Val >= Low && Val < Hi);
}
+/// isUndefOrInRange - Return true if every element in Mask, begining
+/// from position Pos and ending in Pos+Size, falls within the specified
+/// range (L, L+Pos]. or is undef.
+static bool isUndefOrInRange(const SmallVectorImpl<int> &Mask,
+ int Pos, int Size, int Low, int Hi) {
+ for (int i = Pos, e = Pos+Size; i != e; ++i)
+ if (!isUndefOrInRange(Mask[i], Low, Hi))
+ return false;
+ return true;
+}
+
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
/// specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
@@ -3034,6 +3111,17 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
return false;
}
+/// isSequentialOrUndefInRange - Return true if every element in Mask, begining
+/// from position Pos and ending in Pos+Size, falls within the specified
+/// sequential range (L, L+Pos]. or is undef.
+static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
+ int Pos, int Size, int Low) {
+ for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (!isUndefOrEqual(Mask[i], Low))
+ return false;
+ return true;
+}
+
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
@@ -3104,11 +3192,13 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PALIGNR.
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool hasSSSE3) {
+ bool hasSSSE3OrAVX) {
int i, e = VT.getVectorNumElements();
+ if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
+ return false;
// Do not handle v2i64 / v2f64 shuffles with palignr.
- if (e < 4 || !hasSSSE3)
+ if (e < 4 || !hasSSSE3OrAVX)
return false;
for (i = 0; i != e; ++i)
@@ -3119,42 +3209,176 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
if (i == e)
return false;
- // Determine if it's ok to perform a palignr with only the LHS, since we
- // don't have access to the actual shuffle elements to see if RHS is undef.
- bool Unary = Mask[i] < (int)e;
- bool NeedsUnary = false;
+ // Make sure we're shifting in the right direction.
+ if (Mask[i] <= i)
+ return false;
int s = Mask[i] - i;
// Check the rest of the elements to see if they are consecutive.
for (++i; i != e; ++i) {
int m = Mask[i];
- if (m < 0)
- continue;
+ if (m >= 0 && m != s+i)
+ return false;
+ }
+ return true;
+}
+
+/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// VSHUFPSY.
+static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ if (NumElems != 8)
+ return false;
- Unary = Unary && (m < (int)e);
- NeedsUnary = NeedsUnary || (m < s);
+ // VSHUFPSY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
+ // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
+ // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
+ //
+ // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
+ // Y3..Y0, Y3..Y0, X3..X0, X3..X0
+ //
+ int QuarterSize = NumElems/4;
+ int HalfSize = QuarterSize*2;
+ for (int i = 0; i < QuarterSize; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, HalfSize))
+ return false;
+ for (int i = QuarterSize; i < QuarterSize*2; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
+ return false;
- if (NeedsUnary && !Unary)
+ // The mask of the second half must be the same as the first but with
+ // the appropriate offsets. This works in the same way as VPERMILPS
+ // works with masks.
+ for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
+ if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
+ return false;
+ int FstHalfIdx = i-HalfSize;
+ if (Mask[FstHalfIdx] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
return false;
- if (Unary && m != ((s+i) & (e-1)))
+ }
+ for (int i = QuarterSize*3; i < NumElems; ++i) {
+ if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
return false;
- if (!Unary && m != (s+i))
+ int FstHalfIdx = i-HalfSize;
+ if (Mask[FstHalfIdx] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
return false;
+
}
+
return true;
}
-bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isPALIGNRMask(M, N->getValueType(0), true);
+/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
+static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
+ "Only supports v8i32 and v8f32 types");
+
+ int HalfSize = NumElems/2;
+ unsigned Mask = 0;
+ for (int i = 0; i != NumElems ; ++i) {
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ // The mask of the first half must be equal to the second one.
+ unsigned Shamt = (i%HalfSize)*2;
+ unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
+ Mask |= Elt << Shamt;
+ }
+
+ return Mask;
+}
+
+/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
+/// version and the mask of the second half isn't binded with the first
+/// one.
+static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ if (NumElems != 4)
+ return false;
+
+ // VSHUFPSY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
+ // SRC1 => X3 X2 X1 X0
+ // SRC2 => Y3 Y2 Y1 Y0
+ //
+ // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0
+ //
+ int QuarterSize = NumElems/4;
+ int HalfSize = QuarterSize*2;
+ for (int i = 0; i < QuarterSize; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, HalfSize))
+ return false;
+ for (int i = QuarterSize; i < QuarterSize*2; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
+ return false;
+ for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
+ if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
+ return false;
+ for (int i = QuarterSize*3; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
+ return false;
+
+ return true;
+}
+
+/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
+static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
+ "Only supports v4i64 and v4f64 types");
+
+ int HalfSize = NumElems/2;
+ unsigned Mask = 0;
+ for (int i = 0; i != NumElems ; ++i) {
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ int Elt = SVOp->getMaskElt(i) % HalfSize;
+ Mask |= Elt << i;
+ }
+
+ return Mask;
}
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to SHUFP*.
+/// specifies a shuffle of elements that is suitable for input to 128-bit
+/// SHUFPS and SHUFPD.
static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
int NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+
if (NumElems != 2 && NumElems != 4)
return false;
@@ -3204,7 +3428,13 @@ static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ if (NumElems != 4)
return false;
// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
@@ -3218,15 +3448,19 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
if (NumElems != 4)
return false;
return isUndefOrEqual(N->getMaskElt(0), 2) &&
- isUndefOrEqual(N->getMaskElt(1), 3) &&
- isUndefOrEqual(N->getMaskElt(2), 2) &&
- isUndefOrEqual(N->getMaskElt(3), 3);
+ isUndefOrEqual(N->getMaskElt(1), 3) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
}
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
@@ -3273,20 +3507,22 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool V2IsSplat = false) {
int NumElts = VT.getVectorNumElements();
- if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
return false;
- // Handle vector lengths > 128 bits. Define a "section" as a set of
- // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
- // sections.
- unsigned NumSections = VT.getSizeInBits() / 128;
- if (NumSections == 0 ) NumSections = 1; // Handle MMX
- unsigned NumSectionElts = NumElts / NumSections;
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
unsigned Start = 0;
- unsigned End = NumSectionElts;
- for (unsigned s = 0; s < NumSections; ++s) {
- for (unsigned i = Start, j = s * NumSectionElts;
+ unsigned End = NumLaneElts;
+ for (unsigned s = 0; s < NumLanes; ++s) {
+ for (unsigned i = Start, j = s * NumLaneElts;
i != End;
i += 2, ++j) {
int BitI = Mask[i];
@@ -3302,8 +3538,8 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
}
}
// Process the next 128 bits.
- Start += NumSectionElts;
- End += NumSectionElts;
+ Start += NumLaneElts;
+ End += NumLaneElts;
}
return true;
@@ -3320,21 +3556,38 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool V2IsSplat = false) {
int NumElts = VT.getVectorNumElements();
- if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
return false;
- for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j + NumElts/2))
- return false;
- if (V2IsSplat) {
- if (isUndefOrEqual(BitI1, NumElts))
- return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ unsigned Start = 0;
+ unsigned End = NumLaneElts;
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2;
+ i != End; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j+NumElts))
+ return false;
+ }
}
+ // Process the next 128 bits.
+ Start += NumLaneElts;
+ End += NumLaneElts;
}
return true;
}
@@ -3353,16 +3606,21 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
- // Handle vector lengths > 128 bits. Define a "section" as a set of
- // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
- // sections.
- unsigned NumSections = VT.getSizeInBits() / 128;
- if (NumSections == 0 ) NumSections = 1; // Handle MMX
- unsigned NumSectionElts = NumElems / NumSections;
+ // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
+ // FIXME: Need a better way to get rid of this, there's no latency difference
+ // between UNPCKLPD and MOVDDUP, the later should always be checked first and
+ // the former later. We should also remove the "_undef" special mask.
+ if (NumElems == 4 && VT.getSizeInBits() == 256)
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElems / NumLanes;
- for (unsigned s = 0; s < NumSections; ++s) {
- for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
- i != NumSectionElts * (s + 1);
+ for (unsigned s = 0; s < NumLanes; ++s) {
+ for (unsigned i = s * NumLaneElts, j = s * NumLaneElts;
+ i != NumLaneElts * (s + 1);
i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
@@ -3433,6 +3691,189 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
return ::isMOVLMask(M, N->getValueType(0));
}
+/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// as permutations between 128-bit chunks or halves. As an example: this
+/// shuffle bellow:
+/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
+/// The first half comes from the second half of V1 and the second half from the
+/// the second half of V2.
+static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ // The shuffle result is divided into half A and half B. In total the two
+ // sources have 4 halves, namely: C, D, E, F. The final values of A and
+ // B must come from C, D, E or F.
+ int HalfSize = VT.getVectorNumElements()/2;
+ bool MatchA = false, MatchB = false;
+
+ // Check if A comes from one of C, D, E, F.
+ for (int Half = 0; Half < 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
+ MatchA = true;
+ break;
+ }
+ }
+
+ // Check if B comes from one of C, D, E, F.
+ for (int Half = 0; Half < 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
+ MatchB = true;
+ break;
+ }
+ }
+
+ return MatchA && MatchB;
+}
+
+/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
+static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int HalfSize = VT.getVectorNumElements()/2;
+
+ int FstHalf = 0, SndHalf = 0;
+ for (int i = 0; i < HalfSize; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ FstHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+ for (int i = HalfSize; i < HalfSize*2; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ SndHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+
+ return (FstHalf | (SndHalf << 4));
+}
+
+/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
+/// Note that VPERMIL mask matching is different depending whether theunderlying
+/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
+/// to the same elements of the low, but to the higher half of the source.
+/// In VPERMILPD the two lanes could be shuffled independently of each other
+/// with the same restriction that lanes can't be crossed.
+static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+
+ if (!Subtarget->hasAVX())
+ return false;
+
+ // Only match 256-bit with 64-bit types
+ if (VT.getSizeInBits() != 256 || NumElts != 4)
+ return false;
+
+ // The mask on the high lane is independent of the low. Both can match
+ // any element in inside its own lane, but can't cross.
+ int LaneSize = NumElts/NumLanes;
+ for (int l = 0; l < NumLanes; ++l)
+ for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ int LaneStart = l*LaneSize;
+ if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
+ return false;
+ }
+
+ return true;
+}
+
+/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
+/// Note that VPERMIL mask matching is different depending whether theunderlying
+/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
+/// to the same elements of the low, but to the higher half of the source.
+/// In VPERMILPD the two lanes could be shuffled independently of each other
+/// with the same restriction that lanes can't be crossed.
+static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+
+ if (!Subtarget->hasAVX())
+ return false;
+
+ // Only match 256-bit with 32-bit types
+ if (VT.getSizeInBits() != 256 || NumElts != 8)
+ return false;
+
+ // The mask on the high lane should be the same as the low. Actually,
+ // they can differ if any of the corresponding index in a lane is undef
+ // and the other stays in range.
+ int LaneSize = NumElts/NumLanes;
+ for (int i = 0; i < LaneSize; ++i) {
+ int HighElt = i+LaneSize;
+ bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
+ bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
+
+ if (!HighValid || !LowValid)
+ return false;
+ if (Mask[i] < 0 || Mask[HighElt] < 0)
+ continue;
+ if (Mask[HighElt]-Mask[i] != LaneSize)
+ return false;
+ }
+
+ return true;
+}
+
+/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
+static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+ int LaneSize = NumElts/NumLanes;
+
+ // Although the mask is equal for both lanes do it twice to get the cases
+ // where a mask will match because the same mask element is undef on the
+ // first half but valid on the second. This would get pathological cases
+ // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
+ unsigned Mask = 0;
+ for (int l = 0; l < NumLanes; ++l) {
+ for (int i = 0; i < LaneSize; ++i) {
+ int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
+ if (MaskElt < 0)
+ continue;
+ if (MaskElt >= LaneSize)
+ MaskElt -= LaneSize;
+ Mask |= MaskElt << (i*2);
+ }
+ }
+
+ return Mask;
+}
+
+/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
+static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+
+ unsigned Mask = 0;
+ int LaneSize = NumElts/NumLanes;
+ for (int l = 0; l < NumLanes; ++l)
+ for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ int MaskElt = SVOp->getMaskElt(i);
+ if (MaskElt < 0)
+ continue;
+ Mask |= (MaskElt-l*LaneSize) << i;
+ }
+
+ return Mask;
+}
+
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
@@ -3463,58 +3904,92 @@ static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
-bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
+bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
return false;
- // Expect 1, 1, 3, 3
- for (unsigned i = 0; i < 2; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt >= 0 && Elt != 1)
- return false;
- }
+ // The second vector must be undef
+ if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+ return false;
- bool HasHi = false;
- for (unsigned i = 2; i < 4; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt >= 0 && Elt != 3)
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
+ (VT.getSizeInBits() == 256 && NumElems != 8))
+ return false;
+
+ // "i+1" is the value the indexed mask element must have
+ for (unsigned i = 0; i < NumElems; i += 2)
+ if (!isUndefOrEqual(N->getMaskElt(i), i+1) ||
+ !isUndefOrEqual(N->getMaskElt(i+1), i+1))
return false;
- if (Elt == 3)
- HasHi = true;
- }
- // Don't use movshdup if it can be done with a shufps.
- // FIXME: verify that matching u, u, 3, 3 is what we want.
- return HasHi;
+
+ return true;
}
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
-bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
+bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
+ return false;
+
+ // The second vector must be undef
+ if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+ return false;
+
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
+ (VT.getSizeInBits() == 256 && NumElems != 8))
return false;
- // Expect 0, 0, 2, 2
- for (unsigned i = 0; i < 2; ++i)
- if (N->getMaskElt(i) > 0)
+ // "i" is the value the indexed mask element must have
+ for (unsigned i = 0; i < NumElems; i += 2)
+ if (!isUndefOrEqual(N->getMaskElt(i), i) ||
+ !isUndefOrEqual(N->getMaskElt(i+1), i))
return false;
- bool HasHi = false;
- for (unsigned i = 2; i < 4; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt >= 0 && Elt != 2)
+ return true;
+}
+
+/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// version of MOVDDUP.
+static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ int NumElts = VT.getVectorNumElements();
+ bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
+ !V2IsUndef || NumElts != 4)
+ return false;
+
+ for (int i = 0; i != NumElts/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), 0))
return false;
- if (Elt == 2)
- HasHi = true;
- }
- // Don't use movsldup if it can be done with a shufps.
- return HasHi;
+ for (int i = NumElts/2; i != NumElts; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+ return false;
+ return true;
}
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+/// specifies a shuffle of elements that is suitable for input to 128-bit
+/// version of MOVDDUP.
bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
- int e = N->getValueType(0).getVectorNumElements() / 2;
+ EVT VT = N->getValueType(0);
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+ int e = VT.getVectorNumElements() / 2;
for (int i = 0; i < e; ++i)
if (!isUndefOrEqual(N->getMaskElt(i), i))
return false;
@@ -3627,6 +4102,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
if (Val >= 0)
break;
}
+ assert(Val - i > 0 && "PALIGNR imm should be positive");
return (Val - i) * EltSize;
}
@@ -3644,7 +4120,6 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
EVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
-
return Index / NumElemsPerChunk;
}
@@ -3662,7 +4137,6 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
EVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
-
return Index / NumElemsPerChunk;
}
@@ -3716,7 +4190,10 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
- if (Op->getValueType(0).getVectorNumElements() != 4)
+ EVT VT = Op->getValueType(0);
+ if (VT.getSizeInBits() != 128)
+ return false;
+ if (VT.getVectorNumElements() != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
@@ -3748,6 +4225,10 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ShuffleVectorSDNode *Op) {
+ EVT VT = Op->getValueType(0);
+ if (VT.getSizeInBits() != 128)
+ return false;
+
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
return false;
// Is V2 is a vector load, don't do this transformation. We will try to use
@@ -3755,7 +4236,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
if (ISD::isNON_EXTLoad(V2))
return false;
- unsigned NumElems = Op->getValueType(0).getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
@@ -3811,7 +4292,7 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
-static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
+static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
@@ -3819,7 +4300,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.getSizeInBits() == 128) { // SSE
- if (HasSSE2) { // SSE2
+ if (HasXMMInt) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { // SSE1
@@ -3838,21 +4319,25 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
}
/// getOnesVector - Returns a vector of specified type with all bits set.
-/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
-/// their original type, ensuring they get CSE'd.
+/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
+/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
+/// original type, ensuring they get CSE'd.
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
assert((VT.is128BitVector() || VT.is256BitVector())
&& "Expected a 128-bit or 256-bit vector type");
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Cst, Cst, Cst, Cst);
- SDValue Vec;
if (VT.is256BitVector()) {
- SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
- } else
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
+ Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
+ Vec = Insert128BitVector(InsV, Vec,
+ DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+ }
+
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
@@ -3902,7 +4387,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
+/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
@@ -3915,31 +4400,95 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
- EVT PVT = MVT::v4f32;
- EVT VT = SV->getValueType(0);
- DebugLoc dl = SV->getDebugLoc();
- SDValue V1 = SV->getOperand(0);
+// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
+// a generic shuffle instruction because the target has no such instructions.
+// Generate shuffles which repeat i16 and i8 several times until they can be
+// represented by v4f32 and then be manipulated by target suported shuffles.
+static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
+ EVT VT = V.getValueType();
int NumElems = VT.getVectorNumElements();
- int EltNo = SV->getSplatIndex();
+ DebugLoc dl = V.getDebugLoc();
- // unpack elements to the correct location
while (NumElems > 4) {
if (EltNo < NumElems/2) {
- V1 = getUnpackl(DAG, dl, VT, V1, V1);
+ V = getUnpackl(DAG, dl, VT, V, V);
} else {
- V1 = getUnpackh(DAG, dl, VT, V1, V1);
+ V = getUnpackh(DAG, dl, VT, V, V);
EltNo -= NumElems/2;
}
NumElems >>= 1;
}
+ return V;
+}
+
+/// getLegalSplat - Generate a legal splat with supported x86 shuffles
+static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
+ EVT VT = V.getValueType();
+ DebugLoc dl = V.getDebugLoc();
+ assert((VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256)
+ && "Vector size not supported");
+
+ if (VT.getSizeInBits() == 128) {
+ V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
+ int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+ V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
+ &SplatMask[0]);
+ } else {
+ // To use VPERMILPS to splat scalars, the second half of indicies must
+ // refer to the higher part, which is a duplication of the lower one,
+ // because VPERMILPS can only handle in-lane permutations.
+ int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
+ EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
+
+ V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
+ V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
+ &SplatMask[0]);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, V);
+}
+
+/// PromoteSplat - Splat is promoted to target supported vector shuffles.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
+ EVT SrcVT = SV->getValueType(0);
+ SDValue V1 = SV->getOperand(0);
+ DebugLoc dl = SV->getDebugLoc();
+
+ int EltNo = SV->getSplatIndex();
+ int NumElems = SrcVT.getVectorNumElements();
+ unsigned Size = SrcVT.getSizeInBits();
+
+ assert(((Size == 128 && NumElems > 4) || Size == 256) &&
+ "Unknown how to promote splat for type");
+
+ // Extract the 128-bit part containing the splat element and update
+ // the splat element index when it refers to the higher register.
+ if (Size == 256) {
+ unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0;
+ V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl);
+ if (Idx > 0)
+ EltNo -= NumElems/2;
+ }
+
+ // All i16 and i8 vector types can't be used directly by a generic shuffle
+ // instruction because the target has no such instruction. Generate shuffles
+ // which repeat i16 and i8 several times until they fit in i32, and then can
+ // be manipulated by target suported shuffles.
+ EVT EltVT = SrcVT.getVectorElementType();
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ V1 = PromoteSplati8i16(V1, DAG, EltNo);
+
+ // Recreate the 256-bit vector and place the same 128-bit vector
+ // into the low and high part. This is necessary because we want
+ // to use VPERM* to shuffle the vectors
+ if (Size == 256) {
+ SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ V1 = Insert128BitVector(InsV, V1,
+ DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+ }
- // Perform the splat.
- int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
- V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
- V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
- return DAG.getNode(ISD::BITCAST, dl, VT, V1);
+ return getLegalSplat(DAG, V1, EltNo);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
@@ -3947,11 +4496,11 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
- bool isZero, bool HasSSE2,
- SelectionDAG &DAG) {
+ bool isZero, bool HasXMMInt,
+ SelectionDAG &DAG) {
EVT VT = V2.getValueType();
SDValue V1 = isZero
- ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ ? getZeroVector(VT, HasXMMInt, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
@@ -4005,6 +4554,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
break;
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
DecodeUNPCKHPMask(NumElems, ShuffleMask);
break;
case X86ISD::PUNPCKLBW:
@@ -4015,8 +4566,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
break;
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPS:
- case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
DecodeUNPCKLPMask(VT, ShuffleMask);
@@ -4052,8 +4601,41 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
Depth+1);
}
+ case X86ISD::VPERMILPS:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPSY:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPDY:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERM2F128:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::PALIGN:
+ return SDValue(); // Not yet implemented.
default:
- assert("not implemented for target shuffle node");
+ assert(0 && "unknown target shuffle node");
return SDValue();
}
@@ -4205,6 +4787,11 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
/// logical left or right shift of a vector.
static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ // Although the logic below support any bitwidth size, there are no
+ // shift instructions which handle more than 128-bit vectors.
+ if (SVOp->getValueType(0).getSizeInBits() > 128)
+ return false;
+
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
return true;
@@ -4295,6 +4882,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
+ assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
@@ -4333,42 +4921,52 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
return SDValue();
}
+ // FIXME: 256-bit vector instructions don't require a strict alignment,
+ // improve this code to support it better.
+ unsigned RequiredAlign = VT.getSizeInBits()/8;
SDValue Chain = LD->getChain();
- // Make sure the stack object alignment is at least 16.
+ // Make sure the stack object alignment is at least 16 or 32.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
if (MFI->isFixedObjectIndex(FI)) {
// Can't change the alignment. FIXME: It's possible to compute
// the exact stack offset and reference FI + adjust offset instead.
// If someone *really* cares about this. That's the way to implement it.
return SDValue();
} else {
- MFI->setObjectAlignment(FI, 16);
+ MFI->setObjectAlignment(FI, RequiredAlign);
}
}
- // (Offset % 16) must be multiple of 4. Then address is then
+ // (Offset % 16 or 32) must be multiple of 4. Then address is then
// Ptr + (Offset & ~15).
if (Offset < 0)
return SDValue();
- if ((Offset % 16) & 3)
+ if ((Offset % RequiredAlign) & 3)
return SDValue();
- int64_t StartOffset = Offset & ~15;
+ int64_t StartOffset = Offset & ~(RequiredAlign-1);
if (StartOffset)
Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
int EltNo = (Offset - StartOffset) >> 2;
- int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
- EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
- SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+ int NumElems = VT.getVectorNumElements();
+
+ EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
+ SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, 0);
- // Canonicalize it to a v4i32 shuffle.
- V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getVectorShuffle(MVT::v4i32, dl, V1,
- DAG.getUNDEF(MVT::v4i32),&Mask[0]));
+
+ // Canonicalize it to a v4i32 or v8i32 shuffle.
+ SmallVector<int, 8> Mask;
+ for (int i = 0; i < NumElems; ++i)
+ Mask.push_back(EltNo);
+
+ V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1);
+ return DAG.getNode(ISD::BITCAST, dl, NVT,
+ DAG.getVectorShuffle(CanonVT, dl, V1,
+ DAG.getUNDEF(CanonVT),&Mask[0]));
}
return SDValue();
@@ -4428,12 +5026,16 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->getAlignment());
- } else if (NumElems == 4 && LastLoadedElt == 1) {
+ } else if (NumElems == 4 && LastLoadedElt == 1 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
- SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
- Ops, 2, MVT::i32,
- LDBase->getMemOperand());
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64,
+ LDBase->getPointerInfo(),
+ LDBase->getAlignment(),
+ false/*isVolatile*/, true/*ReadMem*/,
+ false/*WriteMem*/);
return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
@@ -4445,47 +5047,26 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT ExtVT = VT.getVectorElementType();
-
unsigned NumElems = Op.getNumOperands();
- // For AVX-length vectors, build the individual 128-bit pieces and
- // use shuffles to put them in place.
- if (VT.getSizeInBits() > 256 &&
- Subtarget->hasAVX() &&
- !ISD::isBuildVectorAllZeros(Op.getNode())) {
- SmallVector<SDValue, 8> V;
- V.resize(NumElems);
- for (unsigned i = 0; i < NumElems; ++i) {
- V[i] = Op.getOperand(i);
- }
-
- EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
-
- // Build the lower subvector.
- SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
- // Build the upper subvector.
- SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
- NumElems/2);
+ // Vectors containing all zeros can be matched by pxor and xorps later
+ if (ISD::isBuildVectorAllZeros(Op.getNode())) {
+ // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
+ // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 ||
+ Op.getValueType() == MVT::v8i32)
+ return Op;
- return ConcatVectors(Lower, Upper, DAG);
+ return getZeroVector(Op.getValueType(), Subtarget->hasXMMInt(), DAG, dl);
}
- // All zero's:
- // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
- // All one's:
- // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
- if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
- ISD::isBuildVectorAllOnes(Op.getNode())) {
- // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
- // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
- // eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32 ||
- Op.getValueType() == MVT::v8i32)
+ // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
+ // vectors or broken into v4i32 operations on 256-bit vectors.
+ if (ISD::isBuildVectorAllOnes(Op.getNode())) {
+ if (Op.getValueType() == MVT::v4i32)
return Op;
- if (ISD::isBuildVectorAllOnes(Op.getNode()))
- return getOnesVector(Op.getValueType(), DAG, dl);
- return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
+ return getOnesVector(Op.getValueType(), DAG, dl);
}
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -4538,7 +5119,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasSSE2(), DAG);
+ Subtarget->hasXMMInt(), DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
@@ -4566,7 +5147,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+ return getShuffleVectorZeroOrUndef(Item, 0, true,Subtarget->hasXMMInt(),
DAG);
} else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
@@ -4574,7 +5155,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
EVT MiddleVT = MVT::v4i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasSSE2(), DAG);
+ Subtarget->hasXMMInt(), DAG);
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
@@ -4603,7 +5184,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
- Subtarget->hasSSE2(), DAG);
+ Subtarget->hasXMMInt(), DAG);
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(i == Idx ? 0 : 1);
@@ -4631,6 +5212,27 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (IsAllConstants)
return SDValue();
+ // For AVX-length vectors, build the individual 128-bit pieces and use
+ // shuffles to put them in place.
+ if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ SmallVector<SDValue, 32> V;
+ for (unsigned i = 0; i < NumElems; ++i)
+ V.push_back(Op.getOperand(i));
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+ // Build both the lower and upper subvector.
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+ NumElems/2);
+
+ // Recreate the wider vector with the lower and upper part.
+ SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Lower,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(Vec, Upper, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
// Let legalizer expand 2-wide build_vectors.
if (EVTBits == 64) {
if (NumNonZero == 1) {
@@ -4639,7 +5241,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true,
- Subtarget->hasSSE2(), DAG);
+ Subtarget->hasXMMInt(), DAG);
}
return SDValue();
}
@@ -4664,7 +5266,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
- V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ V[i] = getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
}
@@ -4708,7 +5310,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return LD;
// For SSE 4.1, use insertps to put the high elements into the low element.
- if (getSubtarget()->hasSSE41()) {
+ if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -4758,13 +5360,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue
-X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
- // We support concatenate two MMX registers and place them in a MMX
- // register. This is better than doing a stack convert.
+// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place
+// them in a MMX register. This is better than doing a stack convert.
+static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT ResVT = Op.getValueType();
- assert(Op.getNumOperands() == 2);
+
assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
int Mask[2];
@@ -4785,6 +5386,42 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
}
+// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
+// to create 256-bit vectors from two other 128-bit ones.
+static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ResVT = Op.getValueType();
+
+ assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide");
+
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = ResVT.getVectorNumElements();
+
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, ResVT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+}
+
+SDValue
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+ EVT ResVT = Op.getValueType();
+
+ assert(Op.getNumOperands() == 2);
+ assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) &&
+ "Unsupported CONCAT_VECTORS for value type");
+
+ // We support concatenate two MMX registers and place them in a MMX register.
+ // This is better than doing a stack convert.
+ if (ResVT.is128BitVector())
+ return LowerMMXCONCAT_VECTORS(Op, DAG);
+
+ // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
+ // from two other 128-bit ones.
+ return LowerAVXCONCAT_VECTORS(Op, DAG);
+}
+
// v8i16 shuffles - Prefer shuffles in the following order:
// 1. [all] pshuflw, pshufhw, optional move
// 2. [ssse3] 1 x pshufb
@@ -4844,7 +5481,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
- if (Subtarget->hasSSSE3()) {
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
BestLoQuad = InputQuads.find_first();
BestHiQuad = InputQuads.find_next(BestLoQuad);
@@ -4917,7 +5554,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
- if (Subtarget->hasSSSE3()) {
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
SmallVector<SDValue,16> pshufbMask;
// If we have elements from both input vectors, set the high bit of the
@@ -4985,7 +5622,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
NewV.getOperand(0),
X86::getShufflePSHUFLWImmediate(NewV.getNode()),
@@ -5013,7 +5651,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
NewV.getOperand(0),
X86::getShufflePSHUFHWImmediate(NewV.getNode()),
@@ -5079,7 +5718,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
}
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) {
SmallVector<SDValue,16> pshufbMask;
// If all result elements are from one input vector, then only translate
@@ -5276,15 +5915,109 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
OpVT, SrcOp)));
}
-/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
-/// shuffles.
+/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector
+/// shuffle node referes to only one lane in the sources.
+static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ int HalfSize = NumElems/2;
+ SmallVector<int, 16> M;
+ SVOp->getMask(M);
+ bool MatchA = false, MatchB = false;
+
+ for (int l = 0; l < NumElems*2; l += HalfSize) {
+ if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) {
+ MatchA = true;
+ break;
+ }
+ }
+
+ for (int l = 0; l < NumElems*2; l += HalfSize) {
+ if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) {
+ MatchB = true;
+ break;
+ }
+ }
+
+ return MatchA && MatchB;
+}
+
+/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
+/// which could not be matched by any known target speficic shuffle
+static SDValue
+LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ if (areShuffleHalvesWithinDisjointLanes(SVOp)) {
+ // If each half of a vector shuffle node referes to only one lane in the
+ // source vectors, extract each used 128-bit lane and shuffle them using
+ // 128-bit shuffles. Then, concatenate the results. Otherwise leave
+ // the work to the legalizer.
+ DebugLoc dl = SVOp->getDebugLoc();
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ int HalfSize = NumElems/2;
+
+ // Extract the reference for each half
+ int FstVecExtractIdx = 0, SndVecExtractIdx = 0;
+ int FstVecOpNum = 0, SndVecOpNum = 0;
+ for (int i = 0; i < HalfSize; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ FstVecOpNum = Elt/NumElems;
+ FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
+ break;
+ }
+ for (int i = HalfSize; i < NumElems; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ SndVecOpNum = Elt/NumElems;
+ SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
+ break;
+ }
+
+ // Extract the subvectors
+ SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum),
+ DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl);
+ SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum),
+ DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl);
+
+ // Generate 128-bit shuffles
+ SmallVector<int, 16> MaskV1, MaskV2;
+ for (int i = 0; i < HalfSize; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+ }
+ for (int i = HalfSize; i < NumElems; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+ }
+
+ EVT NVT = V1.getValueType();
+ V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]);
+ V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]);
+
+ // Concatenate the result back
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
+ return SDValue();
+}
+
+/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
+/// 4 elements, and match them with several different shuffle types.
static SDValue
-LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
EVT VT = SVOp->getValueType(0);
+ assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
+
SmallVector<std::pair<int, int>, 8> Locs;
Locs.resize(4);
SmallVector<int, 8> Mask1(4U, -1);
@@ -5542,18 +6275,21 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
static
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
- bool HasSSE2) {
+ bool HasXMMInt) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
assert(VT != MVT::v2i64 && "unsupported shuffle type");
- if (HasSSE2 && VT == MVT::v2f64)
+ if (HasXMMInt && VT == MVT::v2f64)
return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
- // v4f32 or v4i32
- return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+ // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
}
static
@@ -5572,8 +6308,24 @@ SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
}
+static inline unsigned getSHUFPOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v8i32: // Use fp unit for int unpack.
+ case MVT::v8f32:
+ case MVT::v4i32: // Use fp unit for int unpack.
+ case MVT::v4f32: return X86ISD::SHUFPS;
+ case MVT::v4i64: // Use fp unit for int unpack.
+ case MVT::v4f64:
+ case MVT::v2i64: // Use fp unit for int unpack.
+ case MVT::v2f64: return X86ISD::SHUFPD;
+ default:
+ llvm_unreachable("Unknown type for shufp*");
+ }
+ return 0;
+}
+
static
-SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
@@ -5602,7 +6354,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
CanFoldLoad = false;
if (CanFoldLoad) {
- if (HasSSE2 && NumElems == 2)
+ if (HasXMMInt && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
if (NumElems == 4)
@@ -5616,28 +6368,30 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
// this is horrible, but will stay like this until we move all shuffle
// matching to x86 specific nodes. Note that for the 1st condition all
// types are matched with movsd.
- if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
- return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
- else if (HasSSE2)
+ if (HasXMMInt) {
+ // FIXME: isMOVLMask should be checked and matched before getMOVLP,
+ // as to remove this logic from here, as much as possible
+ if (NumElems == 2 || !X86::isMOVLMask(SVOp))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
-
+ }
assert(VT != MVT::v4i32 && "unsupported shuffle type");
// Invert the operand order and use SHUFPS to match it.
- return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V2, V1,
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
-static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
+static inline unsigned getUNPCKLOpcode(EVT VT) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
- case MVT::v4f32:
- return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
- case MVT::v2f64:
- return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ case MVT::v4f32: return X86ISD::UNPCKLPS;
+ case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v8i32: // Use fp unit for int unpack.
case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+ case MVT::v4i64: // Use fp unit for int unpack.
case MVT::v4f64: return X86ISD::VUNPCKLPDY;
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
@@ -5653,6 +6407,10 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
case MVT::v4f32: return X86ISD::UNPCKHPS;
case MVT::v2f64: return X86ISD::UNPCKHPD;
+ case MVT::v8i32: // Use fp unit for int unpack.
+ case MVT::v8f32: return X86ISD::VUNPCKHPSY;
+ case MVT::v4i64: // Use fp unit for int unpack.
+ case MVT::v4f64: return X86ISD::VUNPCKHPDY;
case MVT::v16i8: return X86ISD::PUNPCKHBW;
case MVT::v8i16: return X86ISD::PUNPCKHWD;
default:
@@ -5661,6 +6419,68 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
return 0;
}
+static inline unsigned getVPERMILOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32:
+ case MVT::v4f32: return X86ISD::VPERMILPS;
+ case MVT::v2i64:
+ case MVT::v2f64: return X86ISD::VPERMILPD;
+ case MVT::v8i32:
+ case MVT::v8f32: return X86ISD::VPERMILPSY;
+ case MVT::v4i64:
+ case MVT::v4f64: return X86ISD::VPERMILPDY;
+ default:
+ llvm_unreachable("Unknown type for vpermil");
+ }
+ return 0;
+}
+
+/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
+/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming
+/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded.
+static bool isVectorBroadcast(SDValue &Op) {
+ EVT VT = Op.getValueType();
+ bool Is256 = VT.getSizeInBits() == 256;
+
+ assert((VT.getSizeInBits() == 128 || Is256) &&
+ "Unsupported type for vbroadcast node");
+
+ SDValue V = Op;
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (Is256 && !(V.hasOneUse() &&
+ V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(0).getOpcode() == ISD::UNDEF))
+ return false;
+
+ if (Is256)
+ V = V.getOperand(1);
+
+ if (!V.hasOneUse())
+ return false;
+
+ // Check the source scalar_to_vector type. 256-bit broadcasts are
+ // supported for 32/64-bit sizes, while 128-bit ones are only supported
+ // for 32-bit scalars.
+ if (V.getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+
+ unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits();
+ if (ScalarSize != 32 && ScalarSize != 64)
+ return false;
+ if (!Is256 && ScalarSize == 64)
+ return false;
+
+ V = V.getOperand(0);
+ if (!MayFoldLoad(V))
+ return false;
+
+ // Return the load node
+ Op = V;
+ return true;
+}
+
static
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -5672,23 +6492,29 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
- return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ return getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
// Handle splat operations
if (SVOp->isSplat()) {
- // Special case, this is the only place now where it's
- // allowed to return a vector_shuffle operation without
- // using a target specific node, because *hopefully* it
- // will be optimized away by the dag combiner.
- if (VT.getVectorNumElements() <= 4 &&
- CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
+ unsigned NumElem = VT.getVectorNumElements();
+ int Size = VT.getSizeInBits();
+ // Special case, this is the only place now where it's allowed to return
+ // a vector_shuffle operation without using a target specific node, because
+ // *hopefully* it will be optimized away by the dag combiner. FIXME: should
+ // this be moved to DAGCombine instead?
+ if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
return Op;
- // Handle splats by matching through known masks
- if (VT.getVectorNumElements() <= 4)
+ // Use vbroadcast whenever the splat comes from a foldable load
+ if (Subtarget->hasAVX() && isVectorBroadcast(V1))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1);
+
+ // Handle splats by matching through known shuffle masks
+ if ((Size == 128 && NumElem <= 4) ||
+ (Size == 256 && NumElem < 8))
return SDValue();
- // Canonicalize all of the remaining to v4f32.
+ // All remaning splats are promoted to target supported vector shuffles.
return PromoteSplat(SVOp, DAG);
}
@@ -5698,7 +6524,8 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
- } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ } else if ((VT == MVT::v4i32 ||
+ (VT == MVT::v4f32 && Subtarget->hasXMMInt()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
@@ -5731,9 +6558,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
- bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
- bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
- bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+ bool HasXMMInt = Subtarget->hasXMMInt();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
@@ -5765,21 +6590,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
- if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
- if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
- if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
- RelaxedMayFoldVectorLoad(V1))
+ if (X86::isMOVDDUPMask(SVOp) &&
+ (Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ V2IsUndef && RelaxedMayFoldVectorLoad(V1))
return getMOVDDup(Op, dl, V1, DAG);
if (X86::isMOVHLPS_v_undef_Mask(SVOp))
return getMOVHighToLow(Op, dl, DAG);
// Use to match splats
- if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -5792,24 +6616,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
- if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ if (HasXMMInt && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
- if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
- return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
- TargetMask, DAG);
-
- if (VT == MVT::v4f32)
- return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
- TargetMask, DAG);
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V1,
+ TargetMask, DAG);
}
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
SDValue ShVal;
- bool isShift = getSubtarget()->hasSSE2() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ bool isShift = getSubtarget()->hasXMMInt() &&
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -5824,7 +6643,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
if (!X86::isMOVLPMask(SVOp)) {
- if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ if (HasXMMInt && (VT == MVT::v2i64 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
if (VT == MVT::v4i32 || VT == MVT::v4f32)
@@ -5834,19 +6653,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// FIXME: fold these into legal mask.
if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
- return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+ return getMOVLowToHigh(Op, dl, DAG, HasXMMInt);
if (X86::isMOVHLPSMask(SVOp))
return getMOVHighToLow(Op, dl, DAG);
- if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ if (X86::isMOVSHDUPMask(SVOp, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
- if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ if (X86::isMOVSLDUPMask(SVOp, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
if (X86::isMOVLPMask(SVOp))
- return getMOVLP(Op, dl, DAG, HasSSE2);
+ return getMOVLP(Op, dl, DAG, HasXMMInt);
if (ShouldXformToMOVHLPS(SVOp) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -5887,8 +6706,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (X86::isUNPCKLMask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
- dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
if (X86::isUNPCKHMask(SVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
@@ -5915,8 +6733,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
- dl, VT, V2, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
@@ -5932,18 +6749,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SmallVector<int, 16> M;
SVOp->getMask(M);
- if (isPALIGNRMask(M, VT, HasSSSE3))
+ if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()))
return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
X86::getShufflePALIGNRImmediate(SVOp),
DAG);
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
- if (VT == MVT::v2f64) {
- X86ISD::NodeType Opcode =
- getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
- return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
- }
+ if (VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
if (VT == MVT::v2i64)
return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
}
@@ -5958,23 +6772,54 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
X86::getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT)) {
- unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
- if (VT == MVT::v4f32 || VT == MVT::v4i32)
- return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
- TargetMask, DAG);
- if (VT == MVT::v2f64 || VT == MVT::v2i64)
- return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
- TargetMask, DAG);
- }
+ if (isSHUFPMask(M, VT))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
if (X86::isUNPCKL_v_undef_Mask(SVOp))
- if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
- dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
if (X86::isUNPCKH_v_undef_Mask(SVOp))
- if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ //===--------------------------------------------------------------------===//
+ // Generate target specific nodes for 128 or 256-bit shuffles only
+ // supported in the AVX instruction set.
+ //
+
+ // Handle VMOVDDUPY permutations
+ if (isMOVDDUPYMask(SVOp, Subtarget))
+ return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
+
+ // Handle VPERMILPS* permutations
+ if (isVPERMILPSMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
+ getShuffleVPERMILPSImmediate(SVOp), DAG);
+
+ // Handle VPERMILPD* permutations
+ if (isVPERMILPDMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
+ getShuffleVPERMILPDImmediate(SVOp), DAG);
+
+ // Handle VPERM2F128 permutations
+ if (isVPERM2F128Mask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
+ getShuffleVPERM2F128Immediate(SVOp), DAG);
+
+ // Handle VSHUFPSY permutations
+ if (isVSHUFPSYMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ getShuffleVSHUFPSYImmediate(SVOp), DAG);
+
+ // Handle VSHUFPDY permutations
+ if (isVSHUFPDYMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ getShuffleVSHUFPDYImmediate(SVOp), DAG);
+
+ //===--------------------------------------------------------------------===//
+ // Since no target specific shuffle was selected for this generic one,
+ // lower it into other known shuffles. FIXME: this isn't true yet, but
+ // this is the plan.
+ //
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
@@ -5989,9 +6834,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return NewOp;
}
- // Handle all 4 wide cases with a number of shuffles.
- if (NumElems == 4)
- return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+ // Handle all 128-bit wide vectors with 4 elements, and match them with
+ // several different shuffle types.
+ if (NumElems == 4 && VT.getSizeInBits() == 128)
+ return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
+
+ // Handle general 256-bit shuffles
+ if (VT.is256BitVector())
+ return LowerVECTOR_SHUFFLE_256(SVOp, DAG);
return SDValue();
}
@@ -6001,6 +6851,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(0).getValueType().getSizeInBits() != 128)
+ return SDValue();
+
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
@@ -6060,36 +6914,26 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue Vec = Op.getOperand(0);
EVT VecVT = Vec.getValueType();
- // If this is a 256-bit vector result, first extract the 128-bit
- // vector and then extract from the 128-bit vector.
- if (VecVT.getSizeInBits() > 128) {
+ // If this is a 256-bit vector result, first extract the 128-bit vector and
+ // then extract the element from the 128-bit vector.
+ if (VecVT.getSizeInBits() == 256) {
DebugLoc dl = Op.getNode()->getDebugLoc();
unsigned NumElems = VecVT.getVectorNumElements();
SDValue Idx = Op.getOperand(1);
-
- if (!isa<ConstantSDNode>(Idx))
- return SDValue();
-
- unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Get the 128-bit vector.
- bool Upper = IdxVal >= ExtractNumElems;
- Vec = Extract128BitVector(Vec, Idx, DAG, dl);
-
- // Extract from it.
- SDValue ScaledIdx = Idx;
- if (Upper)
- ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
- DAG.getConstant(ExtractNumElems,
- Idx.getValueType()));
+ bool Upper = IdxVal >= NumElems/2;
+ Vec = Extract128BitVector(Vec,
+ DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32), DAG, dl);
+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
- ScaledIdx);
+ Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : Idx);
}
assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
- if (Subtarget->hasSSE41()) {
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
return Res;
@@ -6120,7 +6964,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return Op;
// SHUFPS the element to the lowest double word, then movss.
- int Mask[4] = { Idx, -1, -1, -1 };
+ int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
EVT VVT = Op.getOperand(0).getValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
@@ -6159,6 +7003,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
+ if (VT.getSizeInBits() == 256)
+ return SDValue();
+
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
unsigned Opc;
@@ -6206,35 +7053,28 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- // If this is a 256-bit vector result, first insert into a 128-bit
- // vector and then insert into the 256-bit vector.
- if (VT.getSizeInBits() > 128) {
+ // If this is a 256-bit vector result, first extract the 128-bit vector,
+ // insert the element into the extracted half and then place it back.
+ if (VT.getSizeInBits() == 256) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
- // Get the 128-bit vector.
+ // Get the desired 128-bit vector half.
unsigned NumElems = VT.getVectorNumElements();
unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
- bool Upper = IdxVal >= NumElems / 2;
-
- SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+ bool Upper = IdxVal >= NumElems/2;
+ SDValue Ins128Idx = DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32);
+ SDValue V = Extract128BitVector(N0, Ins128Idx, DAG, dl);
- // Insert into it.
- SDValue ScaledN2 = N2;
- if (Upper)
- ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
- DAG.getConstant(NumElems /
- (VT.getSizeInBits() / 128),
- N2.getValueType()));
- Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
- N1, ScaledN2);
+ // Insert the element into the desired half.
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V,
+ N1, Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : N2);
- // Insert the 128-bit vector
- // FIXME: Why UNDEF?
- return Insert128BitVector(N0, Op, N2, DAG, dl);
+ // Insert the changed part back to the 256-bit vector
+ return Insert128BitVector(N0, V, Ins128Idx, DAG, dl);
}
- if (Subtarget->hasSSE41())
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EltVT == MVT::i8)
@@ -6405,12 +7245,17 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
CodeModel::Model M = getTargetMachine().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
- (M == CodeModel::Small || M == CodeModel::Kernel))
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF())
+ OpFlag = X86II::MO_GOTPCREL;
WrapperKind = X86ISD::WrapperRIP;
- else if (Subtarget->isPICStyleGOT())
- OpFlag = X86II::MO_GOTOFF;
- else if (Subtarget->isPICStyleStubPIC())
- OpFlag = X86II::MO_PIC_BASE_OFFSET;
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOT;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+ } else if (Subtarget->isPICStyleStubNoDynamic()) {
+ OpFlag = X86II::MO_DARWIN_NONLAZY;
+ }
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
@@ -6427,6 +7272,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
Result);
}
+ // For symbols that require a load from a stub to get the address, emit the
+ // load.
+ if (isGlobalStubReference(OpFlag))
+ Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
return Result;
}
@@ -6676,7 +7527,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// And our return value (tls address) is in the standard call return value
// location.
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
- return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+ return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
+ Chain.getValue(1));
}
assert(false &&
@@ -6922,9 +7774,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
// Load the 32-bit value into an XMM register.
SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
- DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Op.getOperand(0),
- DAG.getIntPtrConstant(0)));
+ Op.getOperand(0));
+
+ // Zero out the upper parts of the register.
+ Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasXMMInt(),
+ DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
@@ -7513,6 +8367,9 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
}
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG);
+
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -7563,6 +8420,39 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
}
+// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
+// ones, and then concatenate the result back.
+static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC &&
+ "Unsupported value type for operation");
+
+ int NumElems = VT.getVectorNumElements();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC = Op.getOperand(2);
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ // Extract the RHS vectors
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl);
+
+ // Issue the operation on the smaller types and concatenate the result back
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
+}
+
+
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond;
SDValue Op0 = Op.getOperand(0);
@@ -7575,11 +8465,21 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (isFP) {
unsigned SSECC = 8;
- EVT VT0 = Op0.getValueType();
- assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
- unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
+ EVT EltVT = Op0.getValueType().getVectorElementType();
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+
+ unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
bool Swap = false;
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
switch (SetCCOpcode) {
default: break;
case ISD::SETOEQ:
@@ -7624,6 +8524,10 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
}
+ // Break 256-bit integer vector compare into smaller ones.
+ if (!isFP && VT.getSizeInBits() == 256)
+ return Lower256IntVSETCC(Op, DAG);
+
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
@@ -7654,6 +8558,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Swap)
std::swap(Op0, Op1);
+ // Check that the operation in question is available (most are plain SSE2,
+ // but PCMPGTQ and PCMPEQQ have different requirements).
+ if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX())
+ return SDValue();
+
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
@@ -8014,9 +8925,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
- assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
- "This should be used only on Windows targets");
- assert(!Subtarget->isTargetEnvMacho());
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
+ EnableSegmentedStacks) &&
+ "This should be used only on Windows targets or when segmented stacks "
+ "are being used");
+ assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
@@ -8024,23 +8937,49 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Size = Op.getOperand(1);
// FIXME: Ensure alignment here
- SDValue Flag;
+ bool Is64Bit = Subtarget->is64Bit();
+ EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
- EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
- unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+ if (EnableSegmentedStacks) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
- Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
- Flag = Chain.getValue(1);
+ if (Is64Bit) {
+ // The 64 bit implementation of segmented stacks needs to clobber both r10
+ // r11. This makes it impossible to use it along with nested parameters.
+ const Function *F = MF.getFunction();
+
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; I++)
+ if (I->hasNestAttr())
+ report_fatal_error("Cannot use segmented stacks with functions that "
+ "have nested arguments.");
+ }
+
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+ unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
+ SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
+ DAG.getRegister(Vreg, SPTy));
+ SDValue Ops1[2] = { Value, Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+ } else {
+ SDValue Flag;
+ unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
+ Flag = Chain.getValue(1);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
+ Flag = Chain.getValue(1);
- Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+ Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
- SDValue Ops1[2] = { Chain.getValue(0), Chain };
- return DAG.getMergeValues(Ops1, 2, dl);
+ SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+ }
}
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -8118,7 +9057,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT ArgVT = Op.getNode()->getValueType(0);
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
uint8_t ArgMode;
@@ -8292,6 +9231,19 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ // Arithmetic intrinsics.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
@@ -8535,8 +9487,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
}
-SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue Root = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -8552,8 +9509,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
- const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
- const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
+ const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10);
+ const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11);
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
@@ -8600,9 +9557,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
MachinePointerInfo(TrmpAddr, 22),
false, false, 0);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6);
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
@@ -8619,7 +9574,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
NestReg = X86::ECX;
// Check that ECX wasn't needed by an 'inreg' parameter.
- const FunctionType *FTy = Func->getFunctionType();
+ FunctionType *FTy = Func->getFunctionType();
const AttrListPtr &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
@@ -8657,7 +9612,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
// This is storing the opcode for MOV32ri.
const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
- const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
+ const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, MachinePointerInfo(TrmpAddr),
@@ -8682,9 +9637,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4);
}
}
@@ -8822,8 +9775,58 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
-SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
+// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
+// ones, and then concatenate the result back.
+static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ assert(VT.getSizeInBits() == 256 && VT.isInteger() &&
+ "Unsupported value type for operation");
+
+ int NumElems = VT.getVectorNumElements();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ // Extract the RHS vectors
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
+}
+
+SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType().getSizeInBits() == 256 &&
+ Op.getValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
+SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType().getSizeInBits() == 256 &&
+ Op.getValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
+SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+
+ // Decompose 256-bit ops into smaller 128-bit ops.
+ if (VT.getSizeInBits() == 256)
+ return Lower256IntArith(Op, DAG);
+
assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
DebugLoc dl = Op.getDebugLoc();
@@ -8872,11 +9875,51 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
-
LLVMContext *Context = DAG.getContext();
- // Must have SSE2.
- if (!Subtarget->hasSSE2()) return SDValue();
+ if (!Subtarget->hasXMMInt())
+ return SDValue();
+
+ // Decompose 256-bit shifts into smaller 128-bit shifts.
+ if (VT.getSizeInBits() == 256) {
+ int NumElems = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ // Extract the two vectors
+ SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+
+ // Recreate the shift amount vectors
+ SDValue Amt1, Amt2;
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ // Constant shift amount
+ SmallVector<SDValue, 4> Amt1Csts;
+ SmallVector<SDValue, 4> Amt2Csts;
+ for (int i = 0; i < NumElems/2; ++i)
+ Amt1Csts.push_back(Amt->getOperand(i));
+ for (int i = NumElems/2; i < NumElems; ++i)
+ Amt2Csts.push_back(Amt->getOperand(i));
+
+ Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt1Csts[0], NumElems/2);
+ Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt2Csts[0], NumElems/2);
+ } else {
+ // Variable shift amount
+ Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl);
+ Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
+ // Issue new vector shifts for the smaller types
+ V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
+ V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
+
+ // Concatenate the result back
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
+ }
// Optimize shl/srl/sra with constant shift amount.
if (isSplatVector(Amt.getNode())) {
@@ -8927,9 +9970,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
// Lower SHL with variable shift amount.
- // Cannot lower SHL without SSE2 or later.
- if (!Subtarget->hasSSE2()) return SDValue();
-
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
@@ -8971,7 +10011,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
@@ -8986,13 +10026,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
// return pblendv(r, r+r, a);
- R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
- R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
+ R, DAG.getNode(ISD::ADD, dl, VT, R, R));
return R;
}
return SDValue();
@@ -9057,8 +10097,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(X86::COND_O, MVT::i32),
SDValue(Sum.getNode(), 2));
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
- return Sum;
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
}
@@ -9071,8 +10110,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(Cond, MVT::i32),
SDValue(Sum.getNode(), 1));
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
- return Sum;
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
@@ -9080,8 +10118,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
SDNode* Node = Op.getNode();
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
EVT VT = Node->getValueType(0);
-
- if (Subtarget->hasSSE2() && VT.isVector()) {
+ if (Subtarget->hasXMMInt() && VT.isVector()) {
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
@@ -9091,11 +10128,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
switch (VT.getSimpleVT().SimpleTy) {
default:
return SDValue();
- case MVT::v2i64: {
- SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q;
- SRAIntrinsicsID = 0;
- break;
- }
case MVT::v4i32: {
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
@@ -9115,12 +10147,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
// In case of 1 bit sext, no need to shr
if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
- if (SRAIntrinsicsID) {
- Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(SRAIntrinsicsID, MVT::i32),
- Tmp1, ShAmt);
- }
- return Tmp1;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+ Tmp1, ShAmt);
}
return SDValue();
@@ -9132,7 +10161,7 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
// Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
// There isn't any reason to disable it if the target processor supports it.
- if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
+ if (!Subtarget->hasXMMInt() && !Subtarget->is64Bit()) {
SDValue Chain = Op.getOperand(0);
SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = {
@@ -9172,6 +10201,45 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
}
+SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
+ // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+ // no-sse2). There isn't any reason to disable it if the target processor
+ // supports it.
+ if (Subtarget->hasXMMInt() || Subtarget->is64Bit())
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
@@ -9227,7 +10295,7 @@ SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
- assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ assert(Subtarget->is64Bit() && !Subtarget->hasXMMInt() &&
Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
@@ -9255,7 +10323,34 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
Node->getOperand(0),
Node->getOperand(1), negOp,
cast<AtomicSDNode>(Node)->getSrcValue(),
- cast<AtomicSDNode>(Node)->getAlignment());
+ cast<AtomicSDNode>(Node)->getAlignment(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+}
+
+static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+
+ // Convert seq_cst store -> xchg
+ // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
+ // FIXME: On 32-bit, store -> fist or movq would be more efficient
+ // (The only way to get a 16-byte store is cmpxchg16b)
+ // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
+ if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ return Swap.getValue(1);
+ }
+ // Other atomic stores have a simple pattern.
+ return Op;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
@@ -9291,8 +10386,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
default: llvm_unreachable("Should not custom lower this!");
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
+ case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
@@ -9318,7 +10415,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::VSETCC: return LowerVSETCC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
@@ -9332,11 +10428,12 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
- case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, DAG);
@@ -9352,15 +10449,38 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ADD: return LowerADD(Op, DAG);
+ case ISD::SUB: return LowerSUB(Op, DAG);
}
}
+static void ReplaceATOMIC_LOAD(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+
+ // Convert wide load -> cmpxchg8b/cmpxchg16b
+ // FIXME: On 32-bit, load -> fild or movq would be more efficient
+ // (The only way to get a 16-byte load is cmpxchg16b)
+ // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
+ Node->getOperand(0),
+ Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+}
+
void X86TargetLowering::
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG, unsigned NewOp) const {
- EVT T = Node->getValueType(0);
DebugLoc dl = Node->getDebugLoc();
- assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+ assert (Node->getValueType(0) == MVT::i64 &&
+ "Only know how to expand i64 atomics");
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
@@ -9423,37 +10543,48 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::ATOMIC_CMP_SWAP: {
EVT T = N->getValueType(0);
- assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
+ assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
+ bool Regs64bit = T == MVT::i128;
+ EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
- cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
- DAG.getConstant(0, MVT::i32));
- cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
- DAG.getConstant(1, MVT::i32));
- cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue());
- cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH,
- cpInL.getValue(1));
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
+ DAG.getConstant(0, HalfT));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
+ DAG.getConstant(1, HalfT));
+ cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
+ Regs64bit ? X86::RAX : X86::EAX,
+ cpInL, SDValue());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
+ Regs64bit ? X86::RDX : X86::EDX,
+ cpInH, cpInL.getValue(1));
SDValue swapInL, swapInH;
- swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
- DAG.getConstant(0, MVT::i32));
- swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
- DAG.getConstant(1, MVT::i32));
- swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL,
- cpInH.getValue(1));
- swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH,
- swapInL.getValue(1));
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
+ DAG.getConstant(0, HalfT));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
+ DAG.getConstant(1, HalfT));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl,
+ Regs64bit ? X86::RBX : X86::EBX,
+ swapInL, cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
+ Regs64bit ? X86::RCX : X86::ECX,
+ swapInH, swapInL.getValue(1));
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
swapInH.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
- SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+ unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
+ X86ISD::LCMPXCHG8_DAG;
+ SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
Ops, 3, T, MMO);
- SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
- MVT::i32, Result.getValue(1));
- SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
- MVT::i32, cpOutL.getValue(2));
+ SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
+ Regs64bit ? X86::RAX : X86::EAX,
+ HalfT, Result.getValue(1));
+ SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
+ Regs64bit ? X86::RDX : X86::EDX,
+ HalfT, cpOutL.getValue(2));
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2));
Results.push_back(cpOutH.getValue(1));
return;
}
@@ -9478,6 +10609,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_SWAP:
ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
return;
+ case ISD::ATOMIC_LOAD:
+ ReplaceATOMIC_LOAD(N, Results, DAG);
}
}
@@ -9527,11 +10660,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
case X86ISD::PSIGND: return "X86ISD::PSIGND";
- case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::FHADD: return "X86ISD::FHADD";
+ case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
@@ -9570,6 +10704,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
+ case X86ISD::ANDN: return "X86ISD::ANDN";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
@@ -9596,9 +10731,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
- case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS";
- case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD";
- case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY";
case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
@@ -9610,16 +10742,24 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
+ case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
+ case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
+ case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY";
+ case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
+ case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
+ case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
}
}
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
Reloc::Model R = getTargetMachine().getRelocationModel();
@@ -9671,7 +10811,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
}
-bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
@@ -9691,7 +10831,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return true;
}
-bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
}
@@ -9715,7 +10855,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
+ return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX());
// FIXME: pshufb, blends, shifts.
return (VT.getVectorNumElements() == 2 ||
@@ -9725,7 +10865,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isPSHUFDMask(M, VT) ||
isPSHUFHWMask(M, VT) ||
isPSHUFLWMask(M, VT) ||
- isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
+ isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) ||
isUNPCKLMask(M, VT) ||
isUNPCKHMask(M, VT) ||
isUNPCKL_v_undef_Mask(M, VT) ||
@@ -10158,7 +11298,9 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
- BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+ BuildMI(*BB, MI, dl,
+ TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr),
+ MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
@@ -10513,6 +11655,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MBB->addSuccessor(EndMBB);
}
+ unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
@@ -10521,7 +11664,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
- BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
+ BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
.addFrameIndex(RegSaveFrameIndex)
.addImm(/*Scale=*/1)
.addReg(/*IndexReg=*/0)
@@ -10565,17 +11708,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- const MachineFunction *MF = BB->getParent();
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
- BitVector ReservedRegs = TRI->getReservedRegs(*MF);
-
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
- if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
- unsigned Reg = MO.getReg();
- if (Reg != X86::EFLAGS) continue;
- copy0MBB->addLiveIn(Reg);
- sinkMBB->addLiveIn(Reg);
+ if (!MI->killsRegister(X86::EFLAGS)) {
+ copy0MBB->addLiveIn(X86::EFLAGS);
+ sinkMBB->addLiveIn(X86::EFLAGS);
}
// Transfer the remainder of BB and its successor edges to sinkMBB.
@@ -10611,6 +11746,119 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
}
MachineBasicBlock *
+X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
+ bool Is64Bit) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+
+ assert(EnableSegmentedStacks);
+
+ unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
+ unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
+
+ // BB:
+ // ... [Till the alloca]
+ // If stacklet is not large enough, jump to mallocMBB
+ //
+ // bumpMBB:
+ // Allocate by subtracting from RSP
+ // Jump to continueMBB
+ //
+ // mallocMBB:
+ // Allocate by call to runtime
+ //
+ // continueMBB:
+ // ...
+ // [rest of original BB]
+ //
+
+ MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Is64Bit ? MVT::i64:MVT::i32);
+
+ unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
+ sizeVReg = MI->getOperand(1).getReg(),
+ physSPReg = Is64Bit ? X86::RSP : X86::ESP;
+
+ MachineFunction::iterator MBBIter = BB;
+ ++MBBIter;
+
+ MF->insert(MBBIter, bumpMBB);
+ MF->insert(MBBIter, mallocMBB);
+ MF->insert(MBBIter, continueMBB);
+
+ continueMBB->splice(continueMBB->begin(), BB, llvm::next
+ (MachineBasicBlock::iterator(MI)), BB->end());
+ continueMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add code to the main basic block to check if the stack limit has been hit,
+ // and if so, jump to mallocMBB otherwise to bumpMBB.
+ BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), tmpSPVReg)
+ .addReg(tmpSPVReg).addReg(sizeVReg);
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg)
+ .addReg(tmpSPVReg);
+ BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
+
+ // bumpMBB simply decreases the stack pointer, since we know the current
+ // stacklet has enough space.
+ BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
+ .addReg(tmpSPVReg);
+ BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
+ .addReg(tmpSPVReg);
+ BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+
+ // Calls into a routine in libgcc to allocate more space from the heap.
+ if (Is64Bit) {
+ BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
+ .addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI);
+ } else {
+ BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
+ .addImm(12);
+ BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space");
+ }
+
+ if (!Is64Bit)
+ BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
+ .addImm(16);
+
+ BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
+ .addReg(Is64Bit ? X86::RAX : X86::EAX);
+ BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+
+ // Set up the CFG correctly.
+ BB->addSuccessor(bumpMBB);
+ BB->addSuccessor(mallocMBB);
+ mallocMBB->addSuccessor(continueMBB);
+ bumpMBB->addSuccessor(continueMBB);
+
+ // Take care of the PHI nodes.
+ BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(mallocPtrVReg).addMBB(mallocMBB)
+ .addReg(bumpSPPtrVReg).addMBB(bumpMBB);
+
+ // Delete the original pseudo instruction.
+ MI->eraseFromParent();
+
+ // And we're done.
+ return continueMBB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -10718,11 +11966,11 @@ MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
- default: assert(false && "Unexpected instr type to insert");
+ default: assert(0 && "Unexpected instr type to insert");
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
- assert(!"TAILJMP64 would not be touched here.");
+ assert(0 && "TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
@@ -10745,6 +11993,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
+ case X86::SEG_ALLOCA_32:
+ return EmitLoweredSegAlloca(MI, BB, false);
+ case X86::SEG_ALLOCA_64:
+ return EmitLoweredSegAlloca(MI, BB, true);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
@@ -10754,6 +12006,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
+ case X86::CMOV_V8F32:
+ case X86::CMOV_V4F64:
+ case X86::CMOV_V4I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
@@ -11074,6 +12329,33 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
Mask.getBitWidth() - 1);
break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned NumLoBits = 0;
+ switch (IntId) {
+ default: break;
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse2_pmovmskb_128: {
+ // High bits of movmskp{s|d}, pmovmskb are known zero.
+ switch (IntId) {
+ case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
+ case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
+ case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
+ case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
+ case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
+ case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
+ }
+ KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - NumLoBits);
+ break;
+ }
+ }
+ break;
+ }
}
}
@@ -11102,23 +12384,132 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
-/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
-/// if the load addresses are consecutive, non-overlapping, and in the right
-/// order.
+/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
+/// same as extracting the high 128-bit part of 256-bit vector and then
+/// inserting the result into the low part of a new 256-bit vector
+static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+ SVOp->getMaskElt(j) >= 0)
+ return false;
+
+ return true;
+}
+
+/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
+/// same as extracting the low 128-bit part of 256-bit vector and then
+/// inserting the result into the high part of a new 256-bit vector
+static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+ SVOp->getMaskElt(j) >= 0)
+ return false;
+
+ return true;
+}
+
+/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc dl = N->getDebugLoc();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
+ V2.getOpcode() == ISD::CONCAT_VECTORS) {
+ //
+ // 0,0,0,...
+ // |
+ // V UNDEF BUILD_VECTOR UNDEF
+ // \ / \ /
+ // CONCAT_VECTOR CONCAT_VECTOR
+ // \ /
+ // \ /
+ // RESULT: V + zero extended
+ //
+ if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
+ V2.getOperand(1).getOpcode() != ISD::UNDEF ||
+ V1.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
+ return SDValue();
+
+ // To match the shuffle mask, the first half of the mask should
+ // be exactly the first vector, and all the rest a splat with the
+ // first element of the second one.
+ for (int i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
+ !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
+ return SDValue();
+
+ // Emit a zeroed vector and insert the desired subvector on its
+ // first half.
+ SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
+ SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Combine some shuffles into subvector extracts and inserts:
+ //
+
+ // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ if (isShuffleHigh128VectorInsertLow(SVOp)) {
+ SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
+ V, DAG.getConstant(0, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ if (isShuffleLow128VectorInsertHigh(SVOp)) {
+ SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
+ V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ return SDValue();
+}
+
+/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- if (VT.getSizeInBits() != 128)
- return SDValue();
-
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
return SDValue();
+ // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
+ if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
+ N->getOpcode() == ISD::VECTOR_SHUFFLE)
+ return PerformShuffleCombine256(N, DAG, DCI);
+
+ // Only handle 128 wide vector from here on.
+ if (VT.getSizeInBits() != 128)
+ return SDValue();
+
+ // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
+ // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
+ // consecutive, non-overlapping, and in the right order.
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
@@ -11209,7 +12600,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
+/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
@@ -11217,14 +12609,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Get the LHS/RHS of the select.
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
+ EVT VT = LHS.getValueType();
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
- if (Subtarget->hasSSE2() &&
- (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
- Cond.getOpcode() == ISD::SETCC) {
+ if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
+ VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ (Subtarget->hasXMMInt() ||
+ (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
@@ -11267,7 +12661,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
if (!UnsafeFPMath &&
- !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMAX;
break;
@@ -11680,7 +13074,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
// so we have no knowledge of the shift amount.
- if (!Subtarget->hasSSE2())
+ if (!Subtarget->hasXMMInt())
return SDValue();
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
@@ -11796,7 +13190,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
// SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
// we're requiring SSE2 for both.
- if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ if (Subtarget->hasXMMInt() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CMP0 = N0->getOperand(1);
@@ -11864,6 +13258,36 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector
+/// so it can be folded inside ANDNP.
+static bool CanFoldXORWithAllOnes(const SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // Match direct AllOnes for 128 and 256-bit vectors
+ if (ISD::isBuildVectorAllOnes(N))
+ return true;
+
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ // Sometimes the operand may come from a insert_subvector building a 256-bit
+ // allones vector
+ if (VT.getSizeInBits() == 256 &&
+ N->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+
+ if (V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V1.getOperand(0).getOpcode() == ISD::UNDEF &&
+ ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) &&
+ ISD::isBuildVectorAllOnes(V2.getNode()))
+ return true;
+ }
+
+ return false;
+}
+
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -11874,11 +13298,28 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
+ EVT VT = N->getValueType(0);
+
+ // Create ANDN instructions
+ if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for not
+ if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1);
+ // Check RHS for not
+ if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+ }
+
// Want to form ANDNP nodes:
// 1) In the hopes of then easily combining them with OR and AND nodes
// to form PBLEND/PSIGN.
// 2) To match ANDN packed intrinsics
- EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
@@ -11888,12 +13329,14 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
// Check LHS for vnot
if (N0.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ CanFoldXORWithAllOnes(N0.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
// Check RHS for vnot
if (N1.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ //ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ CanFoldXORWithAllOnes(N1.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
return SDValue();
@@ -11917,7 +13360,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
// look for psign/blend
- if (Subtarget->hasSSSE3()) {
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
if (VT == MVT::v2i64) {
// Canonicalize pandn to RHS
if (N0.getOpcode() == X86ISD::ANDNP)
@@ -11990,13 +13433,13 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
}
}
// PBLENDVB only available on SSE 4.1
- if (!Subtarget->hasSSE41())
+ if (!(Subtarget->hasSSE41() || Subtarget->hasAVX()))
return SDValue();
X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
- Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+ Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y);
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
}
}
@@ -12057,24 +13500,211 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
+static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ EVT RegVT = Ld->getValueType(0);
+ EVT MemVT = Ld->getMemoryVT();
+ DebugLoc dl = Ld->getDebugLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ ISD::LoadExtType Ext = Ld->getExtensionType();
+
+ // If this is a vector EXT Load then attempt to optimize it using a
+ // shuffle. We need SSE4 for the shuffles.
+ // TODO: It is possible to support ZExt by zeroing the undef values
+ // during the shuffle phase or after the shuffle.
+ if (RegVT.isVector() && Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
+ assert(MemVT != RegVT && "Cannot extend to the same type");
+ assert(MemVT.isVector() && "Must load a vector from memory");
+
+ unsigned NumElems = RegVT.getVectorNumElements();
+ unsigned RegSz = RegVT.getSizeInBits();
+ unsigned MemSz = MemVT.getSizeInBits();
+ assert(RegSz > MemSz && "Register size must be greater than the mem size");
+ // All sizes must be a power of two
+ if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue();
+
+ // Attempt to load the original value using a single load op.
+ // Find a scalar type which is equal to the loaded word size.
+ MVT SclrLoadTy = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) {
+ SclrLoadTy = Tp;
+ break;
+ }
+ }
+
+ // Proceed if a load word is found.
+ if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue();
+
+ EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
+ RegSz/SclrLoadTy.getSizeInBits());
+
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ RegSz/MemVT.getScalarType().getSizeInBits());
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ // Perform a single load.
+ SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
+ Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->getAlignment());
+
+ // Insert the word loaded into a vector.
+ SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ LoadUnitVecVT, ScalarLoad);
+
+ // Bitcast the loaded value to a vector of the original element type, in
+ // the size of the target vector type.
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector);
+ unsigned SizeRatio = RegSz/MemSz;
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; i++) ShuffleVec[i*SizeRatio] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(SlicedVec.getValueType()),
+ ShuffleVec.data());
+
+ // Bitcast to the requested type.
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ // Replace the original load with the new sequence
+ // and return the new chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff);
+ return SDValue(ScalarLoad.getNode(), 1);
+ }
+
+ return SDValue();
+}
+
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT VT = St->getValue().getValueType();
+ EVT StVT = St->getMemoryVT();
+ DebugLoc dl = St->getDebugLoc();
+ SDValue StoredVal = St->getOperand(1);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // If we are saving a concatination of two XMM registers, perform two stores.
+ // This is better in Sandy Bridge cause one 256-bit mem op is done via two
+ // 128-bit ones. If in the future the cost becomes only one memory access the
+ // first version would be better.
+ if (VT.getSizeInBits() == 256 &&
+ StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
+ StoredVal.getNumOperands() == 2) {
+
+ SDValue Value0 = StoredVal.getOperand(0);
+ SDValue Value1 = StoredVal.getOperand(1);
+
+ SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
+ SDValue Ptr0 = St->getBasePtr();
+ SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
+
+ SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+ }
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store.
+ // First, pack all of the elements in one place. Next, store to memory
+ // in fewer chunks.
+ if (St->isTruncatingStore() && VT.isVector()) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromSz) % ToSz) return SDValue();
+
+ unsigned SizeRatio = FromSz / ToSz;
+
+ assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StVT.getScalarType(), NumElems*SizeRatio);
+
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; i++ ) ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && StoreType.getSizeInBits() < NumElems * ToSz)
+ StoreType = Tp;
+ }
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue Ptr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ for (unsigned i = 0; i < (ToSz*NumElems)/StoreType.getSizeInBits() ; i++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(i));
+ SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ Chains.push_back(Ch);
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
+ Chains.size());
+ }
+
+
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
// places to insert EMMS. This qualifies as a quick hack.
// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
- StoreSDNode *St = cast<StoreSDNode>(N);
- EVT VT = St->getValue().getValueType();
if (VT.getSizeInBits() != 64)
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
- && Subtarget->hasSSE2();
+ && Subtarget->hasXMMInt();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
@@ -12172,6 +13802,150 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal"
+/// and return the operands for the horizontal operation in LHS and RHS. A
+/// horizontal operation performs the binary operation on successive elements
+/// of its first operand, then on successive elements of its second operand,
+/// returning the resulting values in a vector. For example, if
+/// A = < float a0, float a1, float a2, float a3 >
+/// and
+/// B = < float b0, float b1, float b2, float b3 >
+/// then the result of doing a horizontal operation on A and B is
+/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
+/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
+/// A horizontal-op B, for some already available A and B, and if so then LHS is
+/// set to A, RHS to B, and the routine returns 'true'.
+/// Note that the binary operation should have the property that if one of the
+/// operands is UNDEF then the result is UNDEF.
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+ // Look for the following pattern: if
+ // A = < float a0, float a1, float a2, float a3 >
+ // B = < float b0, float b1, float b2, float b3 >
+ // and
+ // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
+ // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
+ // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
+ // which is A horizontal-op B.
+
+ // At least one of the operands should be a vector shuffle.
+ if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
+ RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
+ return false;
+
+ EVT VT = LHS.getValueType();
+ unsigned N = VT.getVectorNumElements();
+
+ // View LHS in the form
+ // LHS = VECTOR_SHUFFLE A, B, LMask
+ // If LHS is not a shuffle then pretend it is the shuffle
+ // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
+ // NOTE: in what follows a default initialized SDValue represents an UNDEF of
+ // type VT.
+ SDValue A, B;
+ SmallVector<int, 8> LMask(N);
+ if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
+ A = LHS.getOperand(0);
+ if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
+ B = LHS.getOperand(1);
+ cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask);
+ } else {
+ if (LHS.getOpcode() != ISD::UNDEF)
+ A = LHS;
+ for (unsigned i = 0; i != N; ++i)
+ LMask[i] = i;
+ }
+
+ // Likewise, view RHS in the form
+ // RHS = VECTOR_SHUFFLE C, D, RMask
+ SDValue C, D;
+ SmallVector<int, 8> RMask(N);
+ if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
+ C = RHS.getOperand(0);
+ if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
+ D = RHS.getOperand(1);
+ cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask);
+ } else {
+ if (RHS.getOpcode() != ISD::UNDEF)
+ C = RHS;
+ for (unsigned i = 0; i != N; ++i)
+ RMask[i] = i;
+ }
+
+ // Check that the shuffles are both shuffling the same vectors.
+ if (!(A == C && B == D) && !(A == D && B == C))
+ return false;
+
+ // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
+ if (!A.getNode() && !B.getNode())
+ return false;
+
+ // If A and B occur in reverse order in RHS, then "swap" them (which means
+ // rewriting the mask).
+ if (A != C)
+ for (unsigned i = 0; i != N; ++i) {
+ unsigned Idx = RMask[i];
+ if (Idx < N)
+ RMask[i] += N;
+ else if (Idx < 2*N)
+ RMask[i] -= N;
+ }
+
+ // At this point LHS and RHS are equivalent to
+ // LHS = VECTOR_SHUFFLE A, B, LMask
+ // RHS = VECTOR_SHUFFLE A, B, RMask
+ // Check that the masks correspond to performing a horizontal operation.
+ for (unsigned i = 0; i != N; ++i) {
+ unsigned LIdx = LMask[i], RIdx = RMask[i];
+
+ // Ignore any UNDEF components.
+ if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
+ || (!B.getNode() && (LIdx >= N || RIdx >= N)))
+ continue;
+
+ // Check that successive elements are being operated on. If not, this is
+ // not a horizontal operation.
+ if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
+ !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
+ return false;
+ }
+
+ LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
+ RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
+ return true;
+}
+
+/// PerformFADDCombine - Do target-specific dag combines on floating point adds.
+static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Try to synthesize horizontal adds from adds of shuffles.
+ if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ isHorizontalBinOp(LHS, RHS, true))
+ return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
+ return SDValue();
+}
+
+/// PerformFSUBCombine - Do target-specific dag combines on floating point subs.
+static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Try to synthesize horizontal subs from subs of shuffles.
+ if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ isHorizontalBinOp(LHS, RHS, false))
+ return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
+ return SDValue();
+}
+
/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
/// X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
@@ -12326,7 +14100,7 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
// (add Y, (setne X, 0)) -> sbb -1, Y
// (sub (sete X, 0), Y) -> sbb 0, Y
// (sub (setne X, 0), Y) -> adc -1, Y
-static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
DebugLoc DL = N->getDebugLoc();
// Look through ZExts.
@@ -12362,6 +14136,31 @@ static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
}
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // X86 can't encode an immediate LHS of a sub. See if we can push the
+ // negation into a preceding instruction.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
+ // If the RHS of the sub is a XOR with one use and a constant, invert the
+ // immediate. Then add one to the LHS of the sub so we can turn
+ // X-Y -> X+~Y+1, saving one register.
+ if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
+ isa<ConstantSDNode>(Op1.getOperand(1))) {
+ APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
+ EVT VT = Op0.getValueType();
+ SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT,
+ Op1.getOperand(0),
+ DAG.getConstant(~XorC, VT));
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor,
+ DAG.getConstant(C->getAPIntValue()+1, VT));
+ }
+ }
+
+ return OptimizeConditionalInDecrement(N, DAG);
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -12369,10 +14168,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
- case ISD::ADD:
- case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG);
+ case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG);
+ case ISD::SUB: return PerformSubCombine(N, DAG);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
@@ -12380,8 +14180,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
+ case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
+ case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
+ case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
@@ -12398,14 +14201,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKHQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPS:
- case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::MOVHLPS:
@@ -12415,7 +14218,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
+ case X86ISD::VPERM2F128:
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
}
return SDValue();
@@ -12551,7 +14359,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "${0:q}")) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -12572,7 +14380,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -12603,7 +14411,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -12629,7 +14437,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
SplitString(AsmPieces[2], Words, " \t,");
if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
Words[2] == "%edx") {
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -12700,7 +14508,7 @@ TargetLowering::ConstraintWeight
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index b603678..342a5e6 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -175,8 +175,14 @@ namespace llvm {
/// PSIGNB/W/D - Copy integer sign.
PSIGNB, PSIGNW, PSIGND,
- /// PBLENDVB - Variable blend
- PBLENDVB,
+ /// BLEND family of opcodes
+ BLENDV,
+
+ /// FHADD - Floating point horizontal add.
+ FHADD,
+
+ /// FHSUB - Floating point horizontal sub.
+ FHSUB,
/// FMAX, FMIN - Floating point max and min.
///
@@ -222,6 +228,8 @@ namespace llvm {
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
+ ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
+
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
@@ -257,12 +265,12 @@ namespace llvm {
MOVSS,
UNPCKLPS,
UNPCKLPD,
- VUNPCKLPS,
- VUNPCKLPD,
VUNPCKLPSY,
VUNPCKLPDY,
UNPCKHPS,
UNPCKHPD,
+ VUNPCKHPSY,
+ VUNPCKHPDY,
PUNPCKLBW,
PUNPCKLWD,
PUNPCKLDQ,
@@ -271,6 +279,12 @@ namespace llvm {
PUNPCKHWD,
PUNPCKHDQ,
PUNPCKHQDQ,
+ VPERMILPS,
+ VPERMILPSY,
+ VPERMILPD,
+ VPERMILPDY,
+ VPERM2F128,
+ VBROADCAST,
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
@@ -280,6 +294,11 @@ namespace llvm {
// WIN_ALLOCA - Windows's _chkstk call to do stack probing.
WIN_ALLOCA,
+ // SEG_ALLOCA - For allocating variable amounts of stack space when using
+ // segmented stacks. Check if the current stacklet has enough space, and
+ // falls back to heap allocation if not.
+ SEG_ALLOCA,
+
// Memory barrier
MEMBARRIER,
MFENCE,
@@ -297,9 +316,10 @@ namespace llvm {
ATOMNAND64_DAG,
ATOMSWAP64_DAG,
- // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
LCMPXCHG_DAG,
LCMPXCHG8_DAG,
+ LCMPXCHG16_DAG,
// VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
@@ -407,20 +427,16 @@ namespace llvm {
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
- bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
+ bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
- bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
+ bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
bool isMOVDDUPMask(ShuffleVectorSDNode *N);
- /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to PALIGNR.
- bool isPALIGNRMask(ShuffleVectorSDNode *N);
-
/// isVEXTRACTF128Index - Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for input to VEXTRACTF128.
@@ -505,7 +521,7 @@ namespace llvm {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
- virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+ virtual unsigned getByValTypeAlignment(Type *Ty) const;
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
@@ -564,8 +580,8 @@ namespace llvm {
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
- /// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
/// computeMaskedBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
@@ -617,12 +633,12 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
/// isTruncateFree - Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
- virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
/// isZExtFree - Return true if any actual instruction that defines a
@@ -633,7 +649,7 @@ namespace llvm {
/// does not necessarily apply to truncate instructions. e.g. on x86-64,
/// all instructions that define 32-bit values implicit zero-extend the
/// result out to 64 bits.
- virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
/// isNarrowingProfitable - Return true if it's profitable to narrow
@@ -813,11 +829,14 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
@@ -825,6 +844,7 @@ namespace llvm {
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
// Utility functions to help LowerVECTOR_SHUFFLE
@@ -931,6 +951,10 @@ namespace llvm {
MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool Is64Bit) const;
+
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index 9f7a4b0..74b647a 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -650,6 +650,15 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
let isCodeGenOnly = 1;
}
+// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
+class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo, (outs),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", []> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+}
+
// BinOpRM - Instructions like "add reg, reg, [mem]".
class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
@@ -857,11 +866,10 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
// BinOpAI - Instructions like "add %eax, %eax, imm".
class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg>
+ Register areg, string operands>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, !strconcat("{$src, %", areg.AsmName, "|%",
- areg.AsmName, ", $src}"), []> {
+ mnemonic, operands, []> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg];
@@ -926,10 +934,14 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
}
}
@@ -993,10 +1005,14 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
}
}
@@ -1017,10 +1033,10 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
- def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def #NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
@@ -1056,10 +1072,14 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
}
}
@@ -1117,9 +1137,37 @@ let Defs = [EFLAGS] in {
def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
- def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>;
- def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>;
- def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>;
- def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>;
-}
+ def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+
+ // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
+ // register class is constrained to GR8_NOREX.
+ let isPseudo = 1 in
+ def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
+ "", []>;
+}
+//===----------------------------------------------------------------------===//
+// ANDN Instruction
+//
+multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
+ PatFrag ld_frag> {
+ def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>;
+ def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS,
+ (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V;
+ defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index adcc747..da28690 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -106,6 +106,26 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
"# dynamic stack allocation",
[(X86WinAlloca)]>;
+
+// When using segmented stacks these are lowered into instructions which first
+// check if the current stacklet has enough free memory. If it does, memory is
+// allocated by bumping the stack pointer. Otherwise memory is allocated from
+// the heap.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in
+def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
+ "# variable sized alloca for segmented stacks",
+ [(set GR32:$dst,
+ (X86SegAlloca GR32:$size))]>,
+ Requires<[In32BitMode]>;
+
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in
+def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
+ "# variable sized alloca for segmented stacks",
+ [(set GR64:$dst,
+ (X86SegAlloca GR64:$size))]>,
+ Requires<[In64BitMode]>;
+
}
@@ -329,18 +349,11 @@ def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions
-let Constraints = "$src1 = $dst" in {
-
-// Conditional moves
-let Uses = [EFLAGS] in {
-
// X86 doesn't have 8-bit conditional moves. Use a customInserter to
// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
// however that requires promoting the operands, and can induce additional
-// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
-// clobber EFLAGS, because if one of the operands is zero, the expansion
-// could involve an xor.
-let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
+// i8 register pressure.
+let usesCustomInserter = 1, Uses = [EFLAGS] in {
def CMOV_GR8 : I<0, Pseudo,
(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
"#CMOV_GR8 PSEUDO!",
@@ -380,10 +393,7 @@ def CMOV_RFP80 : I<0, Pseudo,
(X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
EFLAGS))]>;
} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
-} // Uses = [EFLAGS]
-
-} // Constraints = "$src1 = $dst" in
+} // UsesCustomInserter = 1, Uses = [EFLAGS]
//===----------------------------------------------------------------------===//
@@ -532,7 +542,7 @@ def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
"#MEMBARRIER",
- [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+ [(X86MemBarrier)]>;
// TODO: Get this to fold the constant into the instruction.
let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
@@ -630,8 +640,8 @@ def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
-defm LOCK_AND : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM4m, "and">;
-defm LOCK_XOR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM6m, "xor">;
+defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
// Optimized codegen when the non-memory output is not used.
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
@@ -665,12 +675,20 @@ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
// Atomic compare and swap.
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
- isCodeGenOnly = 1 in {
+ isCodeGenOnly = 1 in
def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
"lock\n\t"
"cmpxchg8b\t$ptr",
[(X86cas8 addr:$ptr)]>, TB, LOCK;
-}
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
+ isCodeGenOnly = 1 in
+def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
+ "lock\n\t"
+ "cmpxchg16b\t$ptr",
+ [(X86cas16 addr:$ptr)]>, TB, LOCK,
+ Requires<[HasCmpxchg16b]>;
+
let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
"lock\n\t"
@@ -695,7 +713,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
"lock\n\t"
- "cmpxchgq\t$swap,$ptr",
+ "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
[(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
}
@@ -718,11 +736,37 @@ def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
TB, LOCK;
def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
"lock\n\t"
- "xadd\t$val, $ptr",
+ "xadd{q}\t{$val, $ptr|$ptr, $val}",
[(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
TB, LOCK;
}
+def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
+def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
+def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
+def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
+
+def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_8 addr:$dst, GR8 :$src)]>;
+def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_16 addr:$dst, GR16:$src)]>;
+def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_32 addr:$dst, GR32:$src)]>;
+def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_64 addr:$dst, GR64:$src)]>;
+
//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions.
//===----------------------------------------------------------------------===//
@@ -759,6 +803,24 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
[(set VR128:$dst,
(v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
EFLAGS)))]>;
+ def CMOV_V8F32 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V8F32 PSEUDO!",
+ [(set VR256:$dst,
+ (v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V4F64 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V4F64 PSEUDO!",
+ [(set VR256:$dst,
+ (v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V4I64 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V4I64 PSEUDO!",
+ [(set VR256:$dst,
+ (v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
index 2e1d523..e62e6b7 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -76,12 +76,12 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
- (outs GR32_NOREX:$dst), (ins GR8:$src),
+ (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
- (outs GR32_NOREX:$dst), (ins i8mem:$src),
+ (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
index 6d89bcc..0a1590b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -113,6 +113,7 @@ class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
+class VEX_LIG { bit ignoresVEX_L = 1; }
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
@@ -150,6 +151,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
// to be encoded in a immediate field?
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
+ bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
@@ -169,7 +171,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{35} = hasVEX_4VPrefix;
let TSFlags{36} = hasVEX_i8ImmReg;
let TSFlags{37} = hasVEX_L;
- let TSFlags{38} = has3DNow0F0FOpcode;
+ let TSFlags{38} = ignoresVEX_L;
+ let TSFlags{39} = has3DNow0F0FOpcode;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -501,6 +504,9 @@ class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: PDI<o, F, outs, ins, asm, pattern>, REX_W;
+class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : VPDI<o, F, outs, ins, asm, pattern>, VEX_W;
// MMX Instruction templates
//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index b00109c..af919fb 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -39,6 +39,8 @@ def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
+def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
+def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
@@ -49,18 +51,15 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB",
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86psignb : SDNode<"X86ISD::PSIGNB",
+def X86psignb : SDNode<"X86ISD::PSIGNB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86psignw : SDNode<"X86ISD::PSIGNW",
+def X86psignw : SDNode<"X86ISD::PSIGNW",
SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86psignd : SDNode<"X86ISD::PSIGND",
+def X86psignd : SDNode<"X86ISD::PSIGND",
SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86pblendv : SDNode<"X86ISD::PBLENDVB",
- SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
@@ -109,6 +108,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
@@ -133,12 +134,15 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
-def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
-def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
-def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
+def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
@@ -150,6 +154,15 @@ def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
+def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
+def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
+def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
+
+def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+
+def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
@@ -193,17 +206,28 @@ def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
-// Like 'store', but always requires vector alignment.
+// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() >= 16;
}]>;
-// Like 'load', but always requires vector alignment.
+// Like 'store', but always requires 256-bit vector alignment.
+def alignedstore256 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 32;
+}]>;
+
+// Like 'load', but always requires 128-bit vector alignment.
def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
+// Like 'load', but always requires 256-bit vector alignment.
+def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 32;
+}]>;
+
def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
def alignedloadfsf64 : PatFrag<(ops node:$ptr),
@@ -221,13 +245,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr),
// 256-bit aligned load pattern fragments
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
- (v8f32 (alignedload node:$ptr))>;
+ (v8f32 (alignedload256 node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
- (v4f64 (alignedload node:$ptr))>;
+ (v4f64 (alignedload256 node:$ptr))>;
def alignedloadv8i32 : PatFrag<(ops node:$ptr),
- (v8i32 (alignedload node:$ptr))>;
+ (v8i32 (alignedload256 node:$ptr))>;
def alignedloadv4i64 : PatFrag<(ops node:$ptr),
- (v4i64 (alignedload node:$ptr))>;
+ (v4i64 (alignedload256 node:$ptr))>;
// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
@@ -356,7 +380,7 @@ def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
}]>;
-// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
// VINSERTF128 imm.
def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
@@ -398,16 +422,6 @@ def movl : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
}]>;
-def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
@@ -418,16 +432,6 @@ def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
}]>;
-def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
@@ -448,11 +452,6 @@ def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_pshuflw_imm>;
-def palign : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_palign_imm>;
-
def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
(extract_subvector node:$bigvec,
node:$index), [{
@@ -465,3 +464,4 @@ def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
node:$index), [{
return X86::isVINSERTF128Index(N);
}], INSERT_get_vinsertf128_imm>;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 55b5835..3a02de0 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -53,6 +53,36 @@ ReMatPICStubLoad("remat-pic-stub-load",
cl::desc("Re-materialize load from stub in PIC mode"),
cl::init(false), cl::Hidden);
+enum {
+ // Select which memory operand is being unfolded.
+ // (stored in bits 0 - 7)
+ TB_INDEX_0 = 0,
+ TB_INDEX_1 = 1,
+ TB_INDEX_2 = 2,
+ TB_INDEX_MASK = 0xff,
+
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion.
+ // (stored in bits 8 - 15)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT,
+
+ // Do not insert the reverse map (MemOp -> RegOp) into the table.
+ // This may be needed because there is a many -> one mapping.
+ TB_NO_REVERSE = 1 << 16,
+
+ // Do not insert the forward map (RegOp -> MemOp) into the table.
+ // This is needed for Native Client, which prohibits branch
+ // instructions from using a memory operand.
+ TB_NO_FORWARD = 1 << 17,
+
+ TB_FOLDED_LOAD = 1 << 18,
+ TB_FOLDED_STORE = 1 << 19
+};
+
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
? X86::ADJCALLSTACKDOWN64
@@ -61,655 +91,829 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
? X86::ADJCALLSTACKUP64
: X86::ADJCALLSTACKUP32)),
TM(tm), RI(tm, *this) {
- enum {
- TB_NOT_REVERSABLE = 1U << 31,
- TB_FLAGS = TB_NOT_REVERSABLE
- };
- static const unsigned OpTbl2Addr[][2] = {
- { X86::ADC32ri, X86::ADC32mi },
- { X86::ADC32ri8, X86::ADC32mi8 },
- { X86::ADC32rr, X86::ADC32mr },
- { X86::ADC64ri32, X86::ADC64mi32 },
- { X86::ADC64ri8, X86::ADC64mi8 },
- { X86::ADC64rr, X86::ADC64mr },
- { X86::ADD16ri, X86::ADD16mi },
- { X86::ADD16ri8, X86::ADD16mi8 },
- { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE },
- { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
- { X86::ADD16rr, X86::ADD16mr },
- { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
- { X86::ADD32ri, X86::ADD32mi },
- { X86::ADD32ri8, X86::ADD32mi8 },
- { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE },
- { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
- { X86::ADD32rr, X86::ADD32mr },
- { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
- { X86::ADD64ri32, X86::ADD64mi32 },
- { X86::ADD64ri8, X86::ADD64mi8 },
- { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
- { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
- { X86::ADD64rr, X86::ADD64mr },
- { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
- { X86::ADD8ri, X86::ADD8mi },
- { X86::ADD8rr, X86::ADD8mr },
- { X86::AND16ri, X86::AND16mi },
- { X86::AND16ri8, X86::AND16mi8 },
- { X86::AND16rr, X86::AND16mr },
- { X86::AND32ri, X86::AND32mi },
- { X86::AND32ri8, X86::AND32mi8 },
- { X86::AND32rr, X86::AND32mr },
- { X86::AND64ri32, X86::AND64mi32 },
- { X86::AND64ri8, X86::AND64mi8 },
- { X86::AND64rr, X86::AND64mr },
- { X86::AND8ri, X86::AND8mi },
- { X86::AND8rr, X86::AND8mr },
- { X86::DEC16r, X86::DEC16m },
- { X86::DEC32r, X86::DEC32m },
- { X86::DEC64_16r, X86::DEC64_16m },
- { X86::DEC64_32r, X86::DEC64_32m },
- { X86::DEC64r, X86::DEC64m },
- { X86::DEC8r, X86::DEC8m },
- { X86::INC16r, X86::INC16m },
- { X86::INC32r, X86::INC32m },
- { X86::INC64_16r, X86::INC64_16m },
- { X86::INC64_32r, X86::INC64_32m },
- { X86::INC64r, X86::INC64m },
- { X86::INC8r, X86::INC8m },
- { X86::NEG16r, X86::NEG16m },
- { X86::NEG32r, X86::NEG32m },
- { X86::NEG64r, X86::NEG64m },
- { X86::NEG8r, X86::NEG8m },
- { X86::NOT16r, X86::NOT16m },
- { X86::NOT32r, X86::NOT32m },
- { X86::NOT64r, X86::NOT64m },
- { X86::NOT8r, X86::NOT8m },
- { X86::OR16ri, X86::OR16mi },
- { X86::OR16ri8, X86::OR16mi8 },
- { X86::OR16rr, X86::OR16mr },
- { X86::OR32ri, X86::OR32mi },
- { X86::OR32ri8, X86::OR32mi8 },
- { X86::OR32rr, X86::OR32mr },
- { X86::OR64ri32, X86::OR64mi32 },
- { X86::OR64ri8, X86::OR64mi8 },
- { X86::OR64rr, X86::OR64mr },
- { X86::OR8ri, X86::OR8mi },
- { X86::OR8rr, X86::OR8mr },
- { X86::ROL16r1, X86::ROL16m1 },
- { X86::ROL16rCL, X86::ROL16mCL },
- { X86::ROL16ri, X86::ROL16mi },
- { X86::ROL32r1, X86::ROL32m1 },
- { X86::ROL32rCL, X86::ROL32mCL },
- { X86::ROL32ri, X86::ROL32mi },
- { X86::ROL64r1, X86::ROL64m1 },
- { X86::ROL64rCL, X86::ROL64mCL },
- { X86::ROL64ri, X86::ROL64mi },
- { X86::ROL8r1, X86::ROL8m1 },
- { X86::ROL8rCL, X86::ROL8mCL },
- { X86::ROL8ri, X86::ROL8mi },
- { X86::ROR16r1, X86::ROR16m1 },
- { X86::ROR16rCL, X86::ROR16mCL },
- { X86::ROR16ri, X86::ROR16mi },
- { X86::ROR32r1, X86::ROR32m1 },
- { X86::ROR32rCL, X86::ROR32mCL },
- { X86::ROR32ri, X86::ROR32mi },
- { X86::ROR64r1, X86::ROR64m1 },
- { X86::ROR64rCL, X86::ROR64mCL },
- { X86::ROR64ri, X86::ROR64mi },
- { X86::ROR8r1, X86::ROR8m1 },
- { X86::ROR8rCL, X86::ROR8mCL },
- { X86::ROR8ri, X86::ROR8mi },
- { X86::SAR16r1, X86::SAR16m1 },
- { X86::SAR16rCL, X86::SAR16mCL },
- { X86::SAR16ri, X86::SAR16mi },
- { X86::SAR32r1, X86::SAR32m1 },
- { X86::SAR32rCL, X86::SAR32mCL },
- { X86::SAR32ri, X86::SAR32mi },
- { X86::SAR64r1, X86::SAR64m1 },
- { X86::SAR64rCL, X86::SAR64mCL },
- { X86::SAR64ri, X86::SAR64mi },
- { X86::SAR8r1, X86::SAR8m1 },
- { X86::SAR8rCL, X86::SAR8mCL },
- { X86::SAR8ri, X86::SAR8mi },
- { X86::SBB32ri, X86::SBB32mi },
- { X86::SBB32ri8, X86::SBB32mi8 },
- { X86::SBB32rr, X86::SBB32mr },
- { X86::SBB64ri32, X86::SBB64mi32 },
- { X86::SBB64ri8, X86::SBB64mi8 },
- { X86::SBB64rr, X86::SBB64mr },
- { X86::SHL16rCL, X86::SHL16mCL },
- { X86::SHL16ri, X86::SHL16mi },
- { X86::SHL32rCL, X86::SHL32mCL },
- { X86::SHL32ri, X86::SHL32mi },
- { X86::SHL64rCL, X86::SHL64mCL },
- { X86::SHL64ri, X86::SHL64mi },
- { X86::SHL8rCL, X86::SHL8mCL },
- { X86::SHL8ri, X86::SHL8mi },
- { X86::SHLD16rrCL, X86::SHLD16mrCL },
- { X86::SHLD16rri8, X86::SHLD16mri8 },
- { X86::SHLD32rrCL, X86::SHLD32mrCL },
- { X86::SHLD32rri8, X86::SHLD32mri8 },
- { X86::SHLD64rrCL, X86::SHLD64mrCL },
- { X86::SHLD64rri8, X86::SHLD64mri8 },
- { X86::SHR16r1, X86::SHR16m1 },
- { X86::SHR16rCL, X86::SHR16mCL },
- { X86::SHR16ri, X86::SHR16mi },
- { X86::SHR32r1, X86::SHR32m1 },
- { X86::SHR32rCL, X86::SHR32mCL },
- { X86::SHR32ri, X86::SHR32mi },
- { X86::SHR64r1, X86::SHR64m1 },
- { X86::SHR64rCL, X86::SHR64mCL },
- { X86::SHR64ri, X86::SHR64mi },
- { X86::SHR8r1, X86::SHR8m1 },
- { X86::SHR8rCL, X86::SHR8mCL },
- { X86::SHR8ri, X86::SHR8mi },
- { X86::SHRD16rrCL, X86::SHRD16mrCL },
- { X86::SHRD16rri8, X86::SHRD16mri8 },
- { X86::SHRD32rrCL, X86::SHRD32mrCL },
- { X86::SHRD32rri8, X86::SHRD32mri8 },
- { X86::SHRD64rrCL, X86::SHRD64mrCL },
- { X86::SHRD64rri8, X86::SHRD64mri8 },
- { X86::SUB16ri, X86::SUB16mi },
- { X86::SUB16ri8, X86::SUB16mi8 },
- { X86::SUB16rr, X86::SUB16mr },
- { X86::SUB32ri, X86::SUB32mi },
- { X86::SUB32ri8, X86::SUB32mi8 },
- { X86::SUB32rr, X86::SUB32mr },
- { X86::SUB64ri32, X86::SUB64mi32 },
- { X86::SUB64ri8, X86::SUB64mi8 },
- { X86::SUB64rr, X86::SUB64mr },
- { X86::SUB8ri, X86::SUB8mi },
- { X86::SUB8rr, X86::SUB8mr },
- { X86::XOR16ri, X86::XOR16mi },
- { X86::XOR16ri8, X86::XOR16mi8 },
- { X86::XOR16rr, X86::XOR16mr },
- { X86::XOR32ri, X86::XOR32mi },
- { X86::XOR32ri8, X86::XOR32mi8 },
- { X86::XOR32rr, X86::XOR32mr },
- { X86::XOR64ri32, X86::XOR64mi32 },
- { X86::XOR64ri8, X86::XOR64mi8 },
- { X86::XOR64rr, X86::XOR64mr },
- { X86::XOR8ri, X86::XOR8mi },
- { X86::XOR8rr, X86::XOR8mr }
+ static const unsigned OpTbl2Addr[][3] = {
+ { X86::ADC32ri, X86::ADC32mi, 0 },
+ { X86::ADC32ri8, X86::ADC32mi8, 0 },
+ { X86::ADC32rr, X86::ADC32mr, 0 },
+ { X86::ADC64ri32, X86::ADC64mi32, 0 },
+ { X86::ADC64ri8, X86::ADC64mi8, 0 },
+ { X86::ADC64rr, X86::ADC64mr, 0 },
+ { X86::ADD16ri, X86::ADD16mi, 0 },
+ { X86::ADD16ri8, X86::ADD16mi8, 0 },
+ { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
+ { X86::ADD16rr, X86::ADD16mr, 0 },
+ { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
+ { X86::ADD32ri, X86::ADD32mi, 0 },
+ { X86::ADD32ri8, X86::ADD32mi8, 0 },
+ { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32mr, 0 },
+ { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
+ { X86::ADD64ri32, X86::ADD64mi32, 0 },
+ { X86::ADD64ri8, X86::ADD64mi8, 0 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64mr, 0 },
+ { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
+ { X86::ADD8ri, X86::ADD8mi, 0 },
+ { X86::ADD8rr, X86::ADD8mr, 0 },
+ { X86::AND16ri, X86::AND16mi, 0 },
+ { X86::AND16ri8, X86::AND16mi8, 0 },
+ { X86::AND16rr, X86::AND16mr, 0 },
+ { X86::AND32ri, X86::AND32mi, 0 },
+ { X86::AND32ri8, X86::AND32mi8, 0 },
+ { X86::AND32rr, X86::AND32mr, 0 },
+ { X86::AND64ri32, X86::AND64mi32, 0 },
+ { X86::AND64ri8, X86::AND64mi8, 0 },
+ { X86::AND64rr, X86::AND64mr, 0 },
+ { X86::AND8ri, X86::AND8mi, 0 },
+ { X86::AND8rr, X86::AND8mr, 0 },
+ { X86::DEC16r, X86::DEC16m, 0 },
+ { X86::DEC32r, X86::DEC32m, 0 },
+ { X86::DEC64_16r, X86::DEC64_16m, 0 },
+ { X86::DEC64_32r, X86::DEC64_32m, 0 },
+ { X86::DEC64r, X86::DEC64m, 0 },
+ { X86::DEC8r, X86::DEC8m, 0 },
+ { X86::INC16r, X86::INC16m, 0 },
+ { X86::INC32r, X86::INC32m, 0 },
+ { X86::INC64_16r, X86::INC64_16m, 0 },
+ { X86::INC64_32r, X86::INC64_32m, 0 },
+ { X86::INC64r, X86::INC64m, 0 },
+ { X86::INC8r, X86::INC8m, 0 },
+ { X86::NEG16r, X86::NEG16m, 0 },
+ { X86::NEG32r, X86::NEG32m, 0 },
+ { X86::NEG64r, X86::NEG64m, 0 },
+ { X86::NEG8r, X86::NEG8m, 0 },
+ { X86::NOT16r, X86::NOT16m, 0 },
+ { X86::NOT32r, X86::NOT32m, 0 },
+ { X86::NOT64r, X86::NOT64m, 0 },
+ { X86::NOT8r, X86::NOT8m, 0 },
+ { X86::OR16ri, X86::OR16mi, 0 },
+ { X86::OR16ri8, X86::OR16mi8, 0 },
+ { X86::OR16rr, X86::OR16mr, 0 },
+ { X86::OR32ri, X86::OR32mi, 0 },
+ { X86::OR32ri8, X86::OR32mi8, 0 },
+ { X86::OR32rr, X86::OR32mr, 0 },
+ { X86::OR64ri32, X86::OR64mi32, 0 },
+ { X86::OR64ri8, X86::OR64mi8, 0 },
+ { X86::OR64rr, X86::OR64mr, 0 },
+ { X86::OR8ri, X86::OR8mi, 0 },
+ { X86::OR8rr, X86::OR8mr, 0 },
+ { X86::ROL16r1, X86::ROL16m1, 0 },
+ { X86::ROL16rCL, X86::ROL16mCL, 0 },
+ { X86::ROL16ri, X86::ROL16mi, 0 },
+ { X86::ROL32r1, X86::ROL32m1, 0 },
+ { X86::ROL32rCL, X86::ROL32mCL, 0 },
+ { X86::ROL32ri, X86::ROL32mi, 0 },
+ { X86::ROL64r1, X86::ROL64m1, 0 },
+ { X86::ROL64rCL, X86::ROL64mCL, 0 },
+ { X86::ROL64ri, X86::ROL64mi, 0 },
+ { X86::ROL8r1, X86::ROL8m1, 0 },
+ { X86::ROL8rCL, X86::ROL8mCL, 0 },
+ { X86::ROL8ri, X86::ROL8mi, 0 },
+ { X86::ROR16r1, X86::ROR16m1, 0 },
+ { X86::ROR16rCL, X86::ROR16mCL, 0 },
+ { X86::ROR16ri, X86::ROR16mi, 0 },
+ { X86::ROR32r1, X86::ROR32m1, 0 },
+ { X86::ROR32rCL, X86::ROR32mCL, 0 },
+ { X86::ROR32ri, X86::ROR32mi, 0 },
+ { X86::ROR64r1, X86::ROR64m1, 0 },
+ { X86::ROR64rCL, X86::ROR64mCL, 0 },
+ { X86::ROR64ri, X86::ROR64mi, 0 },
+ { X86::ROR8r1, X86::ROR8m1, 0 },
+ { X86::ROR8rCL, X86::ROR8mCL, 0 },
+ { X86::ROR8ri, X86::ROR8mi, 0 },
+ { X86::SAR16r1, X86::SAR16m1, 0 },
+ { X86::SAR16rCL, X86::SAR16mCL, 0 },
+ { X86::SAR16ri, X86::SAR16mi, 0 },
+ { X86::SAR32r1, X86::SAR32m1, 0 },
+ { X86::SAR32rCL, X86::SAR32mCL, 0 },
+ { X86::SAR32ri, X86::SAR32mi, 0 },
+ { X86::SAR64r1, X86::SAR64m1, 0 },
+ { X86::SAR64rCL, X86::SAR64mCL, 0 },
+ { X86::SAR64ri, X86::SAR64mi, 0 },
+ { X86::SAR8r1, X86::SAR8m1, 0 },
+ { X86::SAR8rCL, X86::SAR8mCL, 0 },
+ { X86::SAR8ri, X86::SAR8mi, 0 },
+ { X86::SBB32ri, X86::SBB32mi, 0 },
+ { X86::SBB32ri8, X86::SBB32mi8, 0 },
+ { X86::SBB32rr, X86::SBB32mr, 0 },
+ { X86::SBB64ri32, X86::SBB64mi32, 0 },
+ { X86::SBB64ri8, X86::SBB64mi8, 0 },
+ { X86::SBB64rr, X86::SBB64mr, 0 },
+ { X86::SHL16rCL, X86::SHL16mCL, 0 },
+ { X86::SHL16ri, X86::SHL16mi, 0 },
+ { X86::SHL32rCL, X86::SHL32mCL, 0 },
+ { X86::SHL32ri, X86::SHL32mi, 0 },
+ { X86::SHL64rCL, X86::SHL64mCL, 0 },
+ { X86::SHL64ri, X86::SHL64mi, 0 },
+ { X86::SHL8rCL, X86::SHL8mCL, 0 },
+ { X86::SHL8ri, X86::SHL8mi, 0 },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
+ { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
+ { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
+ { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
+ { X86::SHR16r1, X86::SHR16m1, 0 },
+ { X86::SHR16rCL, X86::SHR16mCL, 0 },
+ { X86::SHR16ri, X86::SHR16mi, 0 },
+ { X86::SHR32r1, X86::SHR32m1, 0 },
+ { X86::SHR32rCL, X86::SHR32mCL, 0 },
+ { X86::SHR32ri, X86::SHR32mi, 0 },
+ { X86::SHR64r1, X86::SHR64m1, 0 },
+ { X86::SHR64rCL, X86::SHR64mCL, 0 },
+ { X86::SHR64ri, X86::SHR64mi, 0 },
+ { X86::SHR8r1, X86::SHR8m1, 0 },
+ { X86::SHR8rCL, X86::SHR8mCL, 0 },
+ { X86::SHR8ri, X86::SHR8mi, 0 },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
+ { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
+ { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
+ { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
+ { X86::SUB16ri, X86::SUB16mi, 0 },
+ { X86::SUB16ri8, X86::SUB16mi8, 0 },
+ { X86::SUB16rr, X86::SUB16mr, 0 },
+ { X86::SUB32ri, X86::SUB32mi, 0 },
+ { X86::SUB32ri8, X86::SUB32mi8, 0 },
+ { X86::SUB32rr, X86::SUB32mr, 0 },
+ { X86::SUB64ri32, X86::SUB64mi32, 0 },
+ { X86::SUB64ri8, X86::SUB64mi8, 0 },
+ { X86::SUB64rr, X86::SUB64mr, 0 },
+ { X86::SUB8ri, X86::SUB8mi, 0 },
+ { X86::SUB8rr, X86::SUB8mr, 0 },
+ { X86::XOR16ri, X86::XOR16mi, 0 },
+ { X86::XOR16ri8, X86::XOR16mi8, 0 },
+ { X86::XOR16rr, X86::XOR16mr, 0 },
+ { X86::XOR32ri, X86::XOR32mi, 0 },
+ { X86::XOR32ri8, X86::XOR32mi8, 0 },
+ { X86::XOR32rr, X86::XOR32mr, 0 },
+ { X86::XOR64ri32, X86::XOR64mi32, 0 },
+ { X86::XOR64ri8, X86::XOR64mi8, 0 },
+ { X86::XOR64rr, X86::XOR64mr, 0 },
+ { X86::XOR8ri, X86::XOR8mi, 0 },
+ { X86::XOR8rr, X86::XOR8mr, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
- assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
- RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
-
- // If this is not a reversible operation (because there is a many->one)
- // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
- if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
- continue;
-
- // Index 0, folded load and store, no alignment requirement.
- unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
-
- assert(!MemOp2RegOpTable.count(MemOp) &&
- "Duplicated entries in unfolding maps?");
- MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
- }
-
- // If the third value is 1, then it's folding either a load or a store.
- static const unsigned OpTbl0[][4] = {
- { X86::BT16ri8, X86::BT16mi8, 1, 0 },
- { X86::BT32ri8, X86::BT32mi8, 1, 0 },
- { X86::BT64ri8, X86::BT64mi8, 1, 0 },
- { X86::CALL32r, X86::CALL32m, 1, 0 },
- { X86::CALL64r, X86::CALL64m, 1, 0 },
- { X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
- { X86::CMP16ri, X86::CMP16mi, 1, 0 },
- { X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
- { X86::CMP16rr, X86::CMP16mr, 1, 0 },
- { X86::CMP32ri, X86::CMP32mi, 1, 0 },
- { X86::CMP32ri8, X86::CMP32mi8, 1, 0 },
- { X86::CMP32rr, X86::CMP32mr, 1, 0 },
- { X86::CMP64ri32, X86::CMP64mi32, 1, 0 },
- { X86::CMP64ri8, X86::CMP64mi8, 1, 0 },
- { X86::CMP64rr, X86::CMP64mr, 1, 0 },
- { X86::CMP8ri, X86::CMP8mi, 1, 0 },
- { X86::CMP8rr, X86::CMP8mr, 1, 0 },
- { X86::DIV16r, X86::DIV16m, 1, 0 },
- { X86::DIV32r, X86::DIV32m, 1, 0 },
- { X86::DIV64r, X86::DIV64m, 1, 0 },
- { X86::DIV8r, X86::DIV8m, 1, 0 },
- { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
- { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
- { X86::IDIV16r, X86::IDIV16m, 1, 0 },
- { X86::IDIV32r, X86::IDIV32m, 1, 0 },
- { X86::IDIV64r, X86::IDIV64m, 1, 0 },
- { X86::IDIV8r, X86::IDIV8m, 1, 0 },
- { X86::IMUL16r, X86::IMUL16m, 1, 0 },
- { X86::IMUL32r, X86::IMUL32m, 1, 0 },
- { X86::IMUL64r, X86::IMUL64m, 1, 0 },
- { X86::IMUL8r, X86::IMUL8m, 1, 0 },
- { X86::JMP32r, X86::JMP32m, 1, 0 },
- { X86::JMP64r, X86::JMP64m, 1, 0 },
- { X86::MOV16ri, X86::MOV16mi, 0, 0 },
- { X86::MOV16rr, X86::MOV16mr, 0, 0 },
- { X86::MOV32ri, X86::MOV32mi, 0, 0 },
- { X86::MOV32rr, X86::MOV32mr, 0, 0 },
- { X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
- { X86::MOV64rr, X86::MOV64mr, 0, 0 },
- { X86::MOV8ri, X86::MOV8mi, 0, 0 },
- { X86::MOV8rr, X86::MOV8mr, 0, 0 },
- { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
- { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 },
- { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 },
- { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
- { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 },
- { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 },
- { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
- { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
- { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
- { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
- { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
- { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
- { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 },
- { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 },
- { X86::MUL16r, X86::MUL16m, 1, 0 },
- { X86::MUL32r, X86::MUL32m, 1, 0 },
- { X86::MUL64r, X86::MUL64m, 1, 0 },
- { X86::MUL8r, X86::MUL8m, 1, 0 },
- { X86::SETAEr, X86::SETAEm, 0, 0 },
- { X86::SETAr, X86::SETAm, 0, 0 },
- { X86::SETBEr, X86::SETBEm, 0, 0 },
- { X86::SETBr, X86::SETBm, 0, 0 },
- { X86::SETEr, X86::SETEm, 0, 0 },
- { X86::SETGEr, X86::SETGEm, 0, 0 },
- { X86::SETGr, X86::SETGm, 0, 0 },
- { X86::SETLEr, X86::SETLEm, 0, 0 },
- { X86::SETLr, X86::SETLm, 0, 0 },
- { X86::SETNEr, X86::SETNEm, 0, 0 },
- { X86::SETNOr, X86::SETNOm, 0, 0 },
- { X86::SETNPr, X86::SETNPm, 0, 0 },
- { X86::SETNSr, X86::SETNSm, 0, 0 },
- { X86::SETOr, X86::SETOm, 0, 0 },
- { X86::SETPr, X86::SETPm, 0, 0 },
- { X86::SETSr, X86::SETSm, 0, 0 },
- { X86::TAILJMPr, X86::TAILJMPm, 1, 0 },
- { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 },
- { X86::TEST16ri, X86::TEST16mi, 1, 0 },
- { X86::TEST32ri, X86::TEST32mi, 1, 0 },
- { X86::TEST64ri32, X86::TEST64mi32, 1, 0 },
- { X86::TEST8ri, X86::TEST8mi, 1, 0 }
+ unsigned MemOp = OpTbl2Addr[i][1];
+ unsigned Flags = OpTbl2Addr[i][2];
+ AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 0, folded load and store, no alignment requirement.
+ Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
+ }
+
+ static const unsigned OpTbl0[][3] = {
+ { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
+ { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
+ { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
+ { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
+ { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
+ { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD },
+ { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
+ { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
+ { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
+ { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
+ { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
+ { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
+ { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
+ { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
+ { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
+ { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
+ { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
+ { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
+ { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
+ { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
+ { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
+ { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
+ { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
+ { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
+ { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
+ { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
+ { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
+ { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
+ { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
+ { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
+ { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
+ { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
+ { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
+ { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
+ { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
+ { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
+ { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
+ { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
+ { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
+ { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
+ { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
+ { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
+ { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
+ { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
+ { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
+ { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
+ { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
+ { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
+ { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
+ { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
+ { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
+ { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
+ { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
+ { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
+ { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
+ { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
+ { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
+ { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
+ { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
+ { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
+ { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
+ { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
+ { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
+ { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
+ { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
+ { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
+ { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
+ // AVX 128-bit versions of foldable instructions
+ { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
+ { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
+ { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
+ // AVX 256-bit foldable instructions
+ { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS;
- unsigned FoldedLoad = OpTbl0[i][2];
- unsigned Align = OpTbl0[i][3];
- assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
- RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
-
- // If this is not a reversible operation (because there is a many->one)
- // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
- if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
- continue;
-
- // Index 0, folded load or store.
- unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
- assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
- MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+ unsigned MemOp = OpTbl0[i][1];
+ unsigned Flags = OpTbl0[i][2];
+ AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
+ RegOp, MemOp, TB_INDEX_0 | Flags);
}
static const unsigned OpTbl1[][3] = {
- { X86::CMP16rr, X86::CMP16rm, 0 },
- { X86::CMP32rr, X86::CMP32rm, 0 },
- { X86::CMP64rr, X86::CMP64rm, 0 },
- { X86::CMP8rr, X86::CMP8rm, 0 },
- { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
- { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
- { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
- { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
- { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
- { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
- { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
- { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
- { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
- { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
- { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
- { X86::IMUL16rri, X86::IMUL16rmi, 0 },
- { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
- { X86::IMUL32rri, X86::IMUL32rmi, 0 },
- { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
- { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
- { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
- { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
- { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
- { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 },
- { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 },
- { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 },
- { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
- { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
- { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
- { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
- { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
- { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
- { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
- { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
- { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
- { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
- { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
- { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
- { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
- { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 },
- { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 },
- { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
- { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
- { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
- { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
- { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
- { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
- { X86::MOV16rr, X86::MOV16rm, 0 },
- { X86::MOV32rr, X86::MOV32rm, 0 },
- { X86::MOV64rr, X86::MOV64rm, 0 },
- { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
- { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
- { X86::MOV8rr, X86::MOV8rm, 0 },
- { X86::MOVAPDrr, X86::MOVAPDrm, 16 },
- { X86::MOVAPSrr, X86::MOVAPSrm, 16 },
- { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 },
- { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 },
- { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
- { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
- { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
- { X86::MOVDQArr, X86::MOVDQArm, 16 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 },
- { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
- { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
- { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
- { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
- { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
- { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
- { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
- { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
- { X86::MOVUPDrr, X86::MOVUPDrm, 16 },
- { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
- { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
- { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
- { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
- { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
- { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
- { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
- { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
- { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
- { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
- { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
- { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
- { X86::PSHUFDri, X86::PSHUFDmi, 16 },
- { X86::PSHUFHWri, X86::PSHUFHWmi, 16 },
- { X86::PSHUFLWri, X86::PSHUFLWmi, 16 },
- { X86::RCPPSr, X86::RCPPSm, 16 },
- { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 },
- { X86::RSQRTPSr, X86::RSQRTPSm, 16 },
- { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 },
- { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
- { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
- { X86::SQRTPDr, X86::SQRTPDm, 16 },
- { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 },
- { X86::SQRTPSr, X86::SQRTPSm, 16 },
- { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 },
- { X86::SQRTSDr, X86::SQRTSDm, 0 },
- { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
- { X86::SQRTSSr, X86::SQRTSSm, 0 },
- { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
- { X86::TEST16rr, X86::TEST16rm, 0 },
- { X86::TEST32rr, X86::TEST32rm, 0 },
- { X86::TEST64rr, X86::TEST64rm, 0 },
- { X86::TEST8rr, X86::TEST8rm, 0 },
+ { X86::CMP16rr, X86::CMP16rm, 0 },
+ { X86::CMP32rr, X86::CMP32rm, 0 },
+ { X86::CMP64rr, X86::CMP64rm, 0 },
+ { X86::CMP8rr, X86::CMP8rm, 0 },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm, TB_NO_REVERSE },
+ { X86::FsMOVAPSrr, X86::MOVSSrm, TB_NO_REVERSE },
+ { X86::IMUL16rri, X86::IMUL16rmi, 0 },
+ { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
+ { X86::IMUL32rri, X86::IMUL32rmi, 0 },
+ { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
+ { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 },
+ { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 },
+ { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
+ { X86::MOV16rr, X86::MOV16rm, 0 },
+ { X86::MOV32rr, X86::MOV32rm, 0 },
+ { X86::MOV64rr, X86::MOV64rm, 0 },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
+ { X86::MOV8rr, X86::MOV8rm, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
+ { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
+ { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
+ { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
+ { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
+ { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
+ { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
+ { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
+ { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
+ { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
+ { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int, TB_ALIGN_16 },
+ { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
+ { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
+ { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 },
+ { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
+ { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 },
+ { X86::SQRTSDr, X86::SQRTSDm, 0 },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
+ { X86::SQRTSSr, X86::SQRTSSm, 0 },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
+ { X86::TEST16rr, X86::TEST16rm, 0 },
+ { X86::TEST32rr, X86::TEST32rm, 0 },
+ { X86::TEST64rr, X86::TEST64rm, 0 },
+ { X86::TEST8rr, X86::TEST8rm, 0 },
// FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
- { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
- { X86::UCOMISSrr, X86::UCOMISSrm, 0 }
+ { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
+ { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
+ // AVX 128-bit versions of foldable instructions
+ { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
+ { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
+ { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 },
+ { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 },
+ { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
+ { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
+ { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
+ { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
+ { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
+ { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
+ { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 },
+ { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
+ { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
+ { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
+ { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 },
+ { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
+ { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 },
+ { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
+ { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
+ { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
+ { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 },
+ { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 },
+ { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 },
+ { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 },
+ { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 },
+ { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 },
+ { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 },
+ { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 },
+ { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 },
+ { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
+ { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
+ // AVX 256-bit foldable instructions
+ { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
- unsigned Align = OpTbl1[i][2];
- assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
- RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
-
- // If this is not a reversible operation (because there is a many->one)
- // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
- if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
- continue;
-
- // Index 1, folded load
- unsigned AuxInfo = 1 | (1 << 4);
- assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
- MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+ unsigned MemOp = OpTbl1[i][1];
+ unsigned Flags = OpTbl1[i][2];
+ AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 1, folded load
+ Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
static const unsigned OpTbl2[][3] = {
- { X86::ADC32rr, X86::ADC32rm, 0 },
- { X86::ADC64rr, X86::ADC64rm, 0 },
- { X86::ADD16rr, X86::ADD16rm, 0 },
- { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
- { X86::ADD32rr, X86::ADD32rm, 0 },
- { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
- { X86::ADD64rr, X86::ADD64rm, 0 },
- { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
- { X86::ADD8rr, X86::ADD8rm, 0 },
- { X86::ADDPDrr, X86::ADDPDrm, 16 },
- { X86::ADDPSrr, X86::ADDPSrm, 16 },
- { X86::ADDSDrr, X86::ADDSDrm, 0 },
- { X86::ADDSSrr, X86::ADDSSrm, 0 },
- { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 },
- { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 },
- { X86::AND16rr, X86::AND16rm, 0 },
- { X86::AND32rr, X86::AND32rm, 0 },
- { X86::AND64rr, X86::AND64rm, 0 },
- { X86::AND8rr, X86::AND8rm, 0 },
- { X86::ANDNPDrr, X86::ANDNPDrm, 16 },
- { X86::ANDNPSrr, X86::ANDNPSrm, 16 },
- { X86::ANDPDrr, X86::ANDPDrm, 16 },
- { X86::ANDPSrr, X86::ANDPSrm, 16 },
- { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
- { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
- { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
- { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
- { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
- { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
- { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
- { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
- { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
- { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
- { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
- { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
- { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
- { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
- { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
- { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
- { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
- { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
- { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
- { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
- { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
- { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
- { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
- { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
- { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
- { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
- { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
- { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
- { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
- { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
- { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
- { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
- { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
- { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
- { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
- { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
- { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
- { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
- { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
- { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
- { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
- { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
- { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
- { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
- { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
- { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
- { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
- { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
- { X86::CMPPDrri, X86::CMPPDrmi, 16 },
- { X86::CMPPSrri, X86::CMPPSrmi, 16 },
- { X86::CMPSDrr, X86::CMPSDrm, 0 },
- { X86::CMPSSrr, X86::CMPSSrm, 0 },
- { X86::DIVPDrr, X86::DIVPDrm, 16 },
- { X86::DIVPSrr, X86::DIVPSrm, 16 },
- { X86::DIVSDrr, X86::DIVSDrm, 0 },
- { X86::DIVSSrr, X86::DIVSSrm, 0 },
- { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 },
- { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 },
- { X86::FsANDPDrr, X86::FsANDPDrm, 16 },
- { X86::FsANDPSrr, X86::FsANDPSrm, 16 },
- { X86::FsORPDrr, X86::FsORPDrm, 16 },
- { X86::FsORPSrr, X86::FsORPSrm, 16 },
- { X86::FsXORPDrr, X86::FsXORPDrm, 16 },
- { X86::FsXORPSrr, X86::FsXORPSrm, 16 },
- { X86::HADDPDrr, X86::HADDPDrm, 16 },
- { X86::HADDPSrr, X86::HADDPSrm, 16 },
- { X86::HSUBPDrr, X86::HSUBPDrm, 16 },
- { X86::HSUBPSrr, X86::HSUBPSrm, 16 },
- { X86::IMUL16rr, X86::IMUL16rm, 0 },
- { X86::IMUL32rr, X86::IMUL32rm, 0 },
- { X86::IMUL64rr, X86::IMUL64rm, 0 },
- { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
- { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
- { X86::MAXPDrr, X86::MAXPDrm, 16 },
- { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 },
- { X86::MAXPSrr, X86::MAXPSrm, 16 },
- { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 },
- { X86::MAXSDrr, X86::MAXSDrm, 0 },
- { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
- { X86::MAXSSrr, X86::MAXSSrm, 0 },
- { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
- { X86::MINPDrr, X86::MINPDrm, 16 },
- { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 },
- { X86::MINPSrr, X86::MINPSrm, 16 },
- { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 },
- { X86::MINSDrr, X86::MINSDrm, 0 },
- { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
- { X86::MINSSrr, X86::MINSSrm, 0 },
- { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
- { X86::MULPDrr, X86::MULPDrm, 16 },
- { X86::MULPSrr, X86::MULPSrm, 16 },
- { X86::MULSDrr, X86::MULSDrm, 0 },
- { X86::MULSSrr, X86::MULSSrm, 0 },
- { X86::OR16rr, X86::OR16rm, 0 },
- { X86::OR32rr, X86::OR32rm, 0 },
- { X86::OR64rr, X86::OR64rm, 0 },
- { X86::OR8rr, X86::OR8rm, 0 },
- { X86::ORPDrr, X86::ORPDrm, 16 },
- { X86::ORPSrr, X86::ORPSrm, 16 },
- { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 },
- { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 },
- { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 },
- { X86::PADDBrr, X86::PADDBrm, 16 },
- { X86::PADDDrr, X86::PADDDrm, 16 },
- { X86::PADDQrr, X86::PADDQrm, 16 },
- { X86::PADDSBrr, X86::PADDSBrm, 16 },
- { X86::PADDSWrr, X86::PADDSWrm, 16 },
- { X86::PADDWrr, X86::PADDWrm, 16 },
- { X86::PANDNrr, X86::PANDNrm, 16 },
- { X86::PANDrr, X86::PANDrm, 16 },
- { X86::PAVGBrr, X86::PAVGBrm, 16 },
- { X86::PAVGWrr, X86::PAVGWrm, 16 },
- { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 },
- { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 },
- { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 },
- { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 },
- { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 },
- { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 },
- { X86::PINSRWrri, X86::PINSRWrmi, 16 },
- { X86::PMADDWDrr, X86::PMADDWDrm, 16 },
- { X86::PMAXSWrr, X86::PMAXSWrm, 16 },
- { X86::PMAXUBrr, X86::PMAXUBrm, 16 },
- { X86::PMINSWrr, X86::PMINSWrm, 16 },
- { X86::PMINUBrr, X86::PMINUBrm, 16 },
- { X86::PMULDQrr, X86::PMULDQrm, 16 },
- { X86::PMULHUWrr, X86::PMULHUWrm, 16 },
- { X86::PMULHWrr, X86::PMULHWrm, 16 },
- { X86::PMULLDrr, X86::PMULLDrm, 16 },
- { X86::PMULLWrr, X86::PMULLWrm, 16 },
- { X86::PMULUDQrr, X86::PMULUDQrm, 16 },
- { X86::PORrr, X86::PORrm, 16 },
- { X86::PSADBWrr, X86::PSADBWrm, 16 },
- { X86::PSLLDrr, X86::PSLLDrm, 16 },
- { X86::PSLLQrr, X86::PSLLQrm, 16 },
- { X86::PSLLWrr, X86::PSLLWrm, 16 },
- { X86::PSRADrr, X86::PSRADrm, 16 },
- { X86::PSRAWrr, X86::PSRAWrm, 16 },
- { X86::PSRLDrr, X86::PSRLDrm, 16 },
- { X86::PSRLQrr, X86::PSRLQrm, 16 },
- { X86::PSRLWrr, X86::PSRLWrm, 16 },
- { X86::PSUBBrr, X86::PSUBBrm, 16 },
- { X86::PSUBDrr, X86::PSUBDrm, 16 },
- { X86::PSUBSBrr, X86::PSUBSBrm, 16 },
- { X86::PSUBSWrr, X86::PSUBSWrm, 16 },
- { X86::PSUBWrr, X86::PSUBWrm, 16 },
- { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 },
- { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 },
- { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 },
- { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 },
- { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 },
- { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 },
- { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 },
- { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 },
- { X86::PXORrr, X86::PXORrm, 16 },
- { X86::SBB32rr, X86::SBB32rm, 0 },
- { X86::SBB64rr, X86::SBB64rm, 0 },
- { X86::SHUFPDrri, X86::SHUFPDrmi, 16 },
- { X86::SHUFPSrri, X86::SHUFPSrmi, 16 },
- { X86::SUB16rr, X86::SUB16rm, 0 },
- { X86::SUB32rr, X86::SUB32rm, 0 },
- { X86::SUB64rr, X86::SUB64rm, 0 },
- { X86::SUB8rr, X86::SUB8rm, 0 },
- { X86::SUBPDrr, X86::SUBPDrm, 16 },
- { X86::SUBPSrr, X86::SUBPSrm, 16 },
- { X86::SUBSDrr, X86::SUBSDrm, 0 },
- { X86::SUBSSrr, X86::SUBSSrm, 0 },
+ { X86::ADC32rr, X86::ADC32rm, 0 },
+ { X86::ADC64rr, X86::ADC64rm, 0 },
+ { X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
+ { X86::ADD8rr, X86::ADD8rm, 0 },
+ { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
+ { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
+ { X86::ADDSDrr, X86::ADDSDrm, 0 },
+ { X86::ADDSSrr, X86::ADDSSrm, 0 },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
+ { X86::AND16rr, X86::AND16rm, 0 },
+ { X86::AND32rr, X86::AND32rm, 0 },
+ { X86::AND64rr, X86::AND64rm, 0 },
+ { X86::AND8rr, X86::AND8rm, 0 },
+ { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
+ { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
+ { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
+ { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
+ { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
+ { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
+ { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
+ { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
+ { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
+ { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
+ { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
+ { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
+ { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
+ { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
+ { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
+ { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
+ { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
+ { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
+ { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
+ { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
+ { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
+ { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
+ { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
+ { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
+ { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
+ { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
+ { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
+ { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
+ { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
+ { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
+ { X86::CMPSDrr, X86::CMPSDrm, 0 },
+ { X86::CMPSSrr, X86::CMPSSrm, 0 },
+ { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
+ { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
+ { X86::DIVSDrr, X86::DIVSDrm, 0 },
+ { X86::DIVSSrr, X86::DIVSSrm, 0 },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 },
+ { X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 },
+ { X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 },
+ { X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 },
+ { X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 },
+ { X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 },
+ { X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 },
+ { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
+ { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
+ { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
+ { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
+ { X86::IMUL16rr, X86::IMUL16rm, 0 },
+ { X86::IMUL32rr, X86::IMUL32rm, 0 },
+ { X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
+ { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
+ { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
+ { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 },
+ { X86::MAXSDrr, X86::MAXSDrm, 0 },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
+ { X86::MAXSSrr, X86::MAXSSrm, 0 },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
+ { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
+ { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 },
+ { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
+ { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 },
+ { X86::MINSDrr, X86::MINSDrm, 0 },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
+ { X86::MINSSrr, X86::MINSSrm, 0 },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
+ { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
+ { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
+ { X86::MULSDrr, X86::MULSDrm, 0 },
+ { X86::MULSSrr, X86::MULSSrm, 0 },
+ { X86::OR16rr, X86::OR16rm, 0 },
+ { X86::OR32rr, X86::OR32rm, 0 },
+ { X86::OR64rr, X86::OR64rm, 0 },
+ { X86::OR8rr, X86::OR8rm, 0 },
+ { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
+ { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
+ { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
+ { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
+ { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
+ { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
+ { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
+ { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
+ { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
+ { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
+ { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
+ { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
+ { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
+ { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
+ { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
+ { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
+ { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
+ { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
+ { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
+ { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
+ { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
+ { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
+ { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
+ { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
+ { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
+ { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
+ { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
+ { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
+ { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
+ { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
+ { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
+ { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
+ { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
+ { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
+ { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
+ { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
+ { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
+ { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
+ { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
+ { X86::SBB32rr, X86::SBB32rm, 0 },
+ { X86::SBB64rr, X86::SBB64rm, 0 },
+ { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
+ { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
+ { X86::SUB16rr, X86::SUB16rm, 0 },
+ { X86::SUB32rr, X86::SUB32rm, 0 },
+ { X86::SUB64rr, X86::SUB64rm, 0 },
+ { X86::SUB8rr, X86::SUB8rm, 0 },
+ { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
+ { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
+ { X86::SUBSDrr, X86::SUBSDrm, 0 },
+ { X86::SUBSSrr, X86::SUBSSrm, 0 },
// FIXME: TEST*rr -> swapped operand of TEST*mr.
- { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 },
- { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 },
- { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 },
- { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 },
- { X86::XOR16rr, X86::XOR16rm, 0 },
- { X86::XOR32rr, X86::XOR32rm, 0 },
- { X86::XOR64rr, X86::XOR64rm, 0 },
- { X86::XOR8rr, X86::XOR8rm, 0 },
- { X86::XORPDrr, X86::XORPDrm, 16 },
- { X86::XORPSrr, X86::XORPSrm, 16 }
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
+ { X86::XOR16rr, X86::XOR16rm, 0 },
+ { X86::XOR32rr, X86::XOR32rm, 0 },
+ { X86::XOR64rr, X86::XOR64rm, 0 },
+ { X86::XOR8rr, X86::XOR8rm, 0 },
+ { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
+ { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
+ // AVX 128-bit versions of foldable instructions
+ { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
+ { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 },
+ { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
+ { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
+ { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
+ { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 },
+ { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 },
+ { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
+ { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
+ { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
+ { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
+ { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
+ { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
+ { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
+ { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 },
+ { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 },
+ { X86::VADDSDrr, X86::VADDSDrm, 0 },
+ { X86::VADDSSrr, X86::VADDSSrm, 0 },
+ { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 },
+ { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 },
+ { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 },
+ { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 },
+ { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 },
+ { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 },
+ { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 },
+ { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 },
+ { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
+ { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
+ { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 },
+ { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 },
+ { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
+ { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
+ { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 },
+ { X86::VFsANDNPSrr, X86::VFsANDNPSrm, TB_ALIGN_16 },
+ { X86::VFsANDPDrr, X86::VFsANDPDrm, TB_ALIGN_16 },
+ { X86::VFsANDPSrr, X86::VFsANDPSrm, TB_ALIGN_16 },
+ { X86::VFsORPDrr, X86::VFsORPDrm, TB_ALIGN_16 },
+ { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 },
+ { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 },
+ { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 },
+ { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 },
+ { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 },
+ { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 },
+ { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 },
+ { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
+ { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
+ { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 },
+ { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 },
+ { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 },
+ { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 },
+ { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
+ { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 },
+ { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
+ { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 },
+ { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 },
+ { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 },
+ { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 },
+ { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 },
+ { X86::VMINSDrr, X86::VMINSDrm, 0 },
+ { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
+ { X86::VMINSSrr, X86::VMINSSrm, 0 },
+ { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
+ { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 },
+ { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 },
+ { X86::VMULSDrr, X86::VMULSDrm, 0 },
+ { X86::VMULSSrr, X86::VMULSSrm, 0 },
+ { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 },
+ { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 },
+ { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 },
+ { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 },
+ { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 },
+ { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 },
+ { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 },
+ { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 },
+ { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 },
+ { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 },
+ { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 },
+ { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 },
+ { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 },
+ { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 },
+ { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 },
+ { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 },
+ { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 },
+ { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
+ { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
+ { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
+ { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 },
+ { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 },
+ { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
+ { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
+ { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
+ { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 },
+ { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 },
+ { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 },
+ { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
+ { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
+ { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
+ { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
+ { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
+ { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
+ { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 },
+ { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 },
+ { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 },
+ { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 },
+ { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 },
+ { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 },
+ { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 },
+ { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 },
+ { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 },
+ { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 },
+ { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 },
+ { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 },
+ { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 },
+ { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 },
+ { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 },
+ { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
+ { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
+ { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 },
+ { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 },
+ { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 },
+ { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 },
+ { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 },
+ { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }
+ // FIXME: add AVX 256-bit foldable instructions
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
- unsigned Align = OpTbl2[i][2];
-
- assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
- RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
-
- // If this is not a reversible operation (because there is a many->one)
- // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
- if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
- continue;
+ unsigned MemOp = OpTbl2[i][1];
+ unsigned Flags = OpTbl2[i][2];
+ AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 2, folded load
+ Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
+ }
+}
- // Index 2, folded load
- unsigned AuxInfo = 2 | (1 << 4);
- assert(!MemOp2RegOpTable.count(MemOp) &&
+void
+X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags) {
+ if ((Flags & TB_NO_FORWARD) == 0) {
+ assert(!R2MTable.count(RegOp) && "Duplicate entry!");
+ R2MTable[RegOp] = std::make_pair(MemOp, Flags);
+ }
+ if ((Flags & TB_NO_REVERSE) == 0) {
+ assert(!M2RTable.count(MemOp) &&
"Duplicated entries in unfolding maps?");
- MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
- }
+ M2RTable[MemOp] = std::make_pair(RegOp, Flags);
+ }
}
bool
@@ -796,6 +1000,11 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MOVAPSrm:
case X86::MOVAPDrm:
case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
case X86::VMOVAPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVDQAYrm:
@@ -820,6 +1029,11 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::MOVAPSmr:
case X86::MOVAPDmr:
case X86::MOVDQAmr:
+ case X86::VMOVSSmr:
+ case X86::VMOVSDmr:
+ case X86::VMOVAPSmr:
+ case X86::VMOVAPDmr:
+ case X86::VMOVDQAmr:
case X86::VMOVAPSYmr:
case X86::VMOVAPDYmr:
case X86::VMOVDQAYmr:
@@ -852,24 +1066,6 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
return 0;
}
-bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
- for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
- oe = MI->memoperands_end();
- o != oe;
- ++o) {
- if ((*o)->isLoad() && (*o)->getValue())
- if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
- FrameIndex = Value->getFrameIndex();
- MMO = *o;
- return true;
- }
- }
- return false;
-}
-
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameStoreOpcode(MI->getOpcode()))
@@ -892,24 +1088,6 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
return 0;
}
-bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
- for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
- oe = MI->memoperands_end();
- o != oe;
- ++o) {
- if ((*o)->isStore() && (*o)->getValue())
- if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
- FrameIndex = Value->getFrameIndex();
- MMO = *o;
- return true;
- }
- }
- return false;
-}
-
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
@@ -941,12 +1119,20 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
case X86::FsMOVAPSrm:
case X86::FsMOVAPDrm: {
// Loads from constant pools are trivially rematerializable.
@@ -1009,15 +1195,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator E = MBB.end();
- // It's always safe to clobber EFLAGS at the end of a block.
- if (I == E)
- return true;
-
// For compile time consideration, if we are not able to determine the
// safety after visiting 4 instructions in each direction, we will assume
// it's not safe.
MachineBasicBlock::iterator Iter = I;
- for (unsigned i = 0; i < 4; ++i) {
+ for (unsigned i = 0; Iter != E && i < 4; ++i) {
bool SeenDef = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
@@ -1037,10 +1219,16 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
// Skip over DBG_VALUE.
while (Iter != E && Iter->isDebugValue())
++Iter;
+ }
- // If we make it to the end of the block, it's safe to clobber EFLAGS.
- if (Iter == E)
- return true;
+ // It is safe to clobber EFLAGS at the end of a block of no successor has it
+ // live in.
+ if (Iter == E) {
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(X86::EFLAGS))
+ return false;
+ return true;
}
MachineBasicBlock::iterator B = MBB.begin();
@@ -1946,7 +2134,8 @@ static bool isHReg(unsigned Reg) {
}
// Try and copy between VR128/VR64 and GR64 registers.
-static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
+ bool HasAVX) {
// SrcReg(VR128) -> DestReg(GR64)
// SrcReg(VR64) -> DestReg(GR64)
// SrcReg(GR64) -> DestReg(VR128)
@@ -1955,7 +2144,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
if (X86::GR64RegClass.contains(DestReg)) {
if (X86::VR128RegClass.contains(SrcReg)) {
// Copy from a VR128 register to a GR64 register.
- return X86::MOVPQIto64rr;
+ return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
} else if (X86::VR64RegClass.contains(SrcReg)) {
// Copy from a VR64 register to a GR64 register.
return X86::MOVSDto64rr;
@@ -1963,12 +2152,23 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
if (X86::VR128RegClass.contains(DestReg))
- return X86::MOV64toPQIrr;
+ return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
// Copy from a GR64 register to a VR64 register.
else if (X86::VR64RegClass.contains(DestReg))
return X86::MOV64toSDrr;
}
+ // SrcReg(FR32) -> DestReg(GR32)
+ // SrcReg(GR32) -> DestReg(FR32)
+
+ if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
+ // Copy from a FR32 register to a GR32 register.
+ return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+
+ if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ // Copy from a GR32 register to a FR32 register.
+ return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+
return 0;
}
@@ -1977,6 +2177,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
// First deal with the normal symmetric copies.
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -1988,18 +2189,21 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
if ((isHReg(DestReg) || isHReg(SrcReg)) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
+ TM.getSubtarget<X86Subtarget>().is64Bit()) {
Opc = X86::MOV8rr_NOREX;
- else
+ // Both operands must be encodable without an REX prefix.
+ assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
+ "8-bit H register can not be copied outside GR8_NOREX");
+ } else
Opc = X86::MOV8rr;
} else if (X86::VR128RegClass.contains(DestReg, SrcReg))
- Opc = X86::MOVAPSrr;
+ Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
Opc = X86::VMOVAPSYrr;
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
else
- Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -2043,6 +2247,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
bool isStackAligned,
const TargetMachine &TM,
bool load) {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (RC->getSize()) {
default:
llvm_unreachable("Unknown spill size");
@@ -2061,7 +2266,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
if (X86::GR32RegClass.hasSubClassEq(RC))
return load ? X86::MOV32rm : X86::MOV32mr;
if (X86::FR32RegClass.hasSubClassEq(RC))
- return load ? X86::MOVSSrm : X86::MOVSSmr;
+ return load ?
+ (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
+ (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
if (X86::RFP32RegClass.hasSubClassEq(RC))
return load ? X86::LD_Fp32m : X86::ST_Fp32m;
llvm_unreachable("Unknown 4-byte regclass");
@@ -2069,7 +2276,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
if (X86::GR64RegClass.hasSubClassEq(RC))
return load ? X86::MOV64rm : X86::MOV64mr;
if (X86::FR64RegClass.hasSubClassEq(RC))
- return load ? X86::MOVSDrm : X86::MOVSDmr;
+ return load ?
+ (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
+ (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
if (X86::VR64RegClass.hasSubClassEq(RC))
return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
if (X86::RFP64RegClass.hasSubClassEq(RC))
@@ -2078,13 +2287,18 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
case 10:
assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
- case 16:
+ case 16: {
assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
// If stack is realigned we can use aligned stores.
if (isStackAligned)
- return load ? X86::MOVAPSrm : X86::MOVAPSmr;
+ return load ?
+ (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) :
+ (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
else
- return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+ return load ?
+ (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) :
+ (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
+ }
case 32:
assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
// If stack is realigned we can use aligned stores.
@@ -2118,7 +2332,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFunction &MF = *MBB.getParent();
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
- bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -2133,7 +2348,9 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = MMOBegin != MMOEnd &&
+ (*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -2151,7 +2368,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -2164,7 +2382,9 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = MMOBegin != MMOEnd &&
+ (*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
@@ -2174,6 +2394,40 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
NewMIs.push_back(MIB);
}
+/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
+/// instruction with two undef reads of the register being defined. This is
+/// used for mapping:
+/// %xmm4 = V_SET0
+/// to:
+/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
+///
+static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) {
+ assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
+ unsigned Reg = MI->getOperand(0).getReg();
+ MI->setDesc(Desc);
+
+ // MachineInstr::addOperand() will insert explicit operands before any
+ // implicit operands.
+ MachineInstrBuilder(MI).addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ // But we don't trust that.
+ assert(MI->getOperand(1).getReg() == Reg &&
+ MI->getOperand(2).getReg() == Reg && "Misplaced operand");
+ return true;
+}
+
+bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ switch (MI->getOpcode()) {
+ case X86::V_SET0:
+ return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+ case X86::TEST8ri_NOREX:
+ MI->setDesc(get(X86::TEST8ri));
+ return true;
+ }
+ return false;
+}
+
MachineInstr*
X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -2305,7 +2559,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr->find(MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
unsigned Opcode = I->second.first;
- unsigned MinAlign = I->second.second;
+ unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
if (Align < MinAlign)
return NULL;
bool NarrowToMOV32rm = false;
@@ -2352,6 +2606,51 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NULL;
}
+/// hasPartialRegUpdate - Return true for all instructions that only update
+/// the first 32 or 64-bits of the destination register and leave the rest
+/// unmodified. This can be used to avoid folding loads if the instructions
+/// only update part of the destination register, and the non-updated part is
+/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
+/// instructions breaks the partial register dependency and it can improve
+/// performance. e.g.:
+///
+/// movss (%rdi), %xmm0
+/// cvtss2sd %xmm0, %xmm0
+///
+/// Instead of
+/// cvtss2sd (%rdi), %xmm0
+///
+/// FIXME: This should be turned into a TSFlags.
+///
+static bool hasPartialRegUpdate(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ // AVX encoded versions
+ case X86::VCVTSD2SSrr:
+ case X86::Int_VCVTSD2SSrr:
+ case X86::VCVTSS2SDrr:
+ case X86::Int_VCVTSS2SDrr:
+ case X86::VRCPSSr:
+ case X86::VROUNDSDr:
+ case X86::VROUNDSSr:
+ case X86::VRSQRTSSr:
+ case X86::VSQRTSSr:
+ return true;
+ }
+
+ return false;
+}
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
@@ -2360,22 +2659,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Check switch flag
if (NoFusing) return NULL;
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- switch (MI->getOpcode()) {
- case X86::CVTSD2SSrr:
- case X86::Int_CVTSD2SSrr:
- case X86::CVTSS2SDrr:
- case X86::Int_CVTSS2SDrr:
- case X86::RCPSSr:
- case X86::RCPSSr_Int:
- case X86::ROUNDSDr:
- case X86::ROUNDSSr:
- case X86::RSQRTSSr:
- case X86::RSQRTSSr_Int:
- case X86::SQRTSSr:
- case X86::SQRTSSr_Int:
- return 0;
- }
+ // Unless optimizing for size, don't fold to avoid partial
+ // register update stalls
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
@@ -2412,22 +2700,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Check switch flag
if (NoFusing) return NULL;
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- switch (MI->getOpcode()) {
- case X86::CVTSD2SSrr:
- case X86::Int_CVTSD2SSrr:
- case X86::CVTSS2SDrr:
- case X86::Int_CVTSS2SDrr:
- case X86::RCPSSr:
- case X86::RCPSSr_Int:
- case X86::ROUNDSDr:
- case X86::ROUNDSSr:
- case X86::RSQRTSSr:
- case X86::RSQRTSSr_Int:
- case X86::SQRTSSr:
- case X86::SQRTSSr_Int:
- return 0;
- }
+ // Unless optimizing for size, don't fold to avoid partial
+ // register update stalls
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
// Determine the alignment of the load.
unsigned Alignment = 0;
@@ -2439,13 +2716,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::AVX_SET0PDY:
Alignment = 32;
break;
- case X86::V_SET0PS:
- case X86::V_SET0PD:
- case X86::V_SET0PI:
+ case X86::V_SET0:
case X86::V_SETALLONES:
- case X86::AVX_SET0PS:
- case X86::AVX_SET0PD:
- case X86::AVX_SET0PI:
+ case X86::AVX_SETALLONES:
Alignment = 16;
break;
case X86::FsFLD0SD:
@@ -2481,18 +2754,16 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
- case X86::V_SET0PS:
- case X86::V_SET0PD:
- case X86::V_SET0PI:
+ case X86::V_SET0:
case X86::V_SETALLONES:
- case X86::AVX_SET0PS:
- case X86::AVX_SET0PD:
- case X86::AVX_SET0PI:
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
+ case X86::AVX_SETALLONES:
case X86::FsFLD0SD:
- case X86::FsFLD0SS: {
- // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
+ case X86::FsFLD0SS:
+ case X86::VFsFLD0SD:
+ case X86::VFsFLD0SS: {
+ // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
@@ -2515,7 +2786,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Create a constant-pool entry.
MachineConstantPool &MCP = *MF.getConstantPool();
- const Type *Ty;
+ Type *Ty;
unsigned Opc = LoadMI->getOpcode();
if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
@@ -2525,9 +2796,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
- Constant::getAllOnesValue(Ty) :
- Constant::getNullValue(Ty);
+
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
+ const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
+ Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
// Create operands to load from the constant pool entry.
@@ -2615,9 +2887,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
- unsigned Index = I->second.second & 0xf;
- bool FoldedLoad = I->second.second & (1 << 4);
- bool FoldedStore = I->second.second & (1 << 5);
+ unsigned Index = I->second.second & TB_INDEX_MASK;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
if (UnfoldLoad && !FoldedLoad)
return false;
UnfoldLoad &= FoldedLoad;
@@ -2743,9 +3015,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
- unsigned Index = I->second.second & 0xf;
- bool FoldedLoad = I->second.second & (1 << 4);
- bool FoldedStore = I->second.second & (1 << 5);
+ unsigned Index = I->second.second & TB_INDEX_MASK;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
const MCInstrDesc &MCID = get(Opc);
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
unsigned NumDefs = MCID.NumDefs;
@@ -2780,7 +3052,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
// Do not introduce a slow unaligned load.
return false;
- bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (*MMOs.first) &&
+ (*MMOs.first)->getAlignment() >= Alignment;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
@@ -2822,7 +3096,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
// Do not introduce a slow unaligned store.
return false;
- bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (*MMOs.first) &&
+ (*MMOs.first)->getAlignment() >= Alignment;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,
@@ -2843,14 +3119,14 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
MemOp2RegOpTable.find(Opc);
if (I == MemOp2RegOpTable.end())
return 0;
- bool FoldedLoad = I->second.second & (1 << 4);
- bool FoldedStore = I->second.second & (1 << 5);
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
if (UnfoldLoad && !FoldedLoad)
return 0;
if (UnfoldStore && !FoldedStore)
return 0;
if (LoadRegIndex)
- *LoadRegIndex = I->second.second & 0xf;
+ *LoadRegIndex = I->second.second & TB_INDEX_MASK;
return I->second.first;
}
@@ -2881,6 +3157,16 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
@@ -2908,6 +3194,16 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
@@ -3007,31 +3303,6 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
}
-
-/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher)
-/// register? e.g. r8, xmm8, xmm13, etc.
-bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
- switch (RegNo) {
- default: break;
- case X86::R8: case X86::R9: case X86::R10: case X86::R11:
- case X86::R12: case X86::R13: case X86::R14: case X86::R15:
- case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
- case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
- case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
- case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
- case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
- case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
- case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
- case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
- case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
- case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
- case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
- case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
- return true;
- }
- return false;
-}
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
@@ -3072,7 +3343,6 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
{ X86::ORPSrm, X86::ORPDrm, X86::PORrm },
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
- { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
// AVX 128-bit support
@@ -3088,7 +3358,6 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
{ X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
{ X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
- { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
// AVX 256-bit support
@@ -3111,13 +3380,13 @@ static const unsigned *lookup(unsigned opcode, unsigned domain) {
}
std::pair<uint16_t, uint16_t>
-X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
return std::make_pair(domain,
domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
}
-void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
@@ -3158,6 +3427,29 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::SQRTSSm_Int:
case X86::SQRTSSr:
case X86::SQRTSSr_Int:
+ // AVX instructions with high latency
+ case X86::VDIVSDrm:
+ case X86::VDIVSDrm_Int:
+ case X86::VDIVSDrr:
+ case X86::VDIVSDrr_Int:
+ case X86::VDIVSSrm:
+ case X86::VDIVSSrm_Int:
+ case X86::VDIVSSrr:
+ case X86::VDIVSSrr_Int:
+ case X86::VSQRTPDm:
+ case X86::VSQRTPDm_Int:
+ case X86::VSQRTPDr:
+ case X86::VSQRTPDr_Int:
+ case X86::VSQRTPSm:
+ case X86::VSQRTPSm_Int:
+ case X86::VSQRTPSr:
+ case X86::VSQRTPSr_Int:
+ case X86::VSQRTSDm:
+ case X86::VSQRTSDm_Int:
+ case X86::VSQRTSDr:
+ case X86::VSQRTSSm:
+ case X86::VSQRTSSm_Int:
+ case X86::VSQRTSSr:
return true;
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index 5f2eba3..97009db 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -27,24 +27,6 @@ namespace llvm {
class X86TargetMachine;
namespace X86 {
- // Enums for memory operand decoding. Each memory operand is represented with
- // a 5 operand sequence in the form:
- // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
- // These enums help decode this.
- enum {
- AddrBaseReg = 0,
- AddrScaleAmt = 1,
- AddrIndexReg = 2,
- AddrDisp = 3,
-
- /// AddrSegmentReg - The operand # of the segment in the memory operand.
- AddrSegmentReg = 4,
-
- /// AddrNumOperands - Total number of operands in a memory reference.
- AddrNumOperands = 5
- };
-
-
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
enum CondCode {
@@ -82,133 +64,8 @@ namespace X86 {
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(X86::CondCode CC);
+} // end namespace X86;
-}
-
-/// X86II - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace X86II {
- /// Target Operand Flag enum.
- enum TOF {
- //===------------------------------------------------------------------===//
- // X86 Specific MachineOperand flags.
-
- MO_NO_FLAG,
-
- /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
- /// relocation of:
- /// SYMBOL_LABEL + [. - PICBASELABEL]
- MO_GOT_ABSOLUTE_ADDRESS,
-
- /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
- /// immediate should get the value of the symbol minus the PIC base label:
- /// SYMBOL_LABEL - PICBASELABEL
- MO_PIC_BASE_OFFSET,
-
- /// MO_GOT - On a symbol operand this indicates that the immediate is the
- /// offset to the GOT entry for the symbol name from the base of the GOT.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOT
- MO_GOT,
-
- /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
- /// the offset to the location of the symbol name from the base of the GOT.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOTOFF
- MO_GOTOFF,
-
- /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
- /// offset to the GOT entry for the symbol name from the current code
- /// location.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOTPCREL
- MO_GOTPCREL,
-
- /// MO_PLT - On a symbol operand this indicates that the immediate is
- /// offset to the PLT entry of symbol name from the current code location.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @PLT
- MO_PLT,
-
- /// MO_TLSGD - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @TLSGD
- MO_TLSGD,
-
- /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @GOTTPOFF
- MO_GOTTPOFF,
-
- /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @INDNTPOFF
- MO_INDNTPOFF,
-
- /// MO_TPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @TPOFF
- MO_TPOFF,
-
- /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @NTPOFF
- MO_NTPOFF,
-
- /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "__imp_FOO" symbol. This is used for
- /// dllimport linkage on windows.
- MO_DLLIMPORT,
-
- /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "FOO$stub" symbol. This is used for calls
- /// and jumps to external functions on Tiger and earlier.
- MO_DARWIN_STUB,
-
- /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
- /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
- MO_DARWIN_NONLAZY,
-
- /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
- /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
- /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
- MO_DARWIN_NONLAZY_PIC_BASE,
-
- /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
- /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
- /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
- /// stub.
- MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
-
- /// MO_TLVP - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// This is the TLS offset for the Darwin TLS mechanism.
- MO_TLVP,
-
- /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
- /// is some TLS offset from the picbase.
- ///
- /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
- MO_TLVP_PIC_BASE
- };
-}
/// isGlobalStubReference - Return true if the specified TargetFlag operand is
/// a reference to a stub for a global, not the global itself.
@@ -243,353 +100,6 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
}
}
-/// X86II - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace X86II {
- enum {
- //===------------------------------------------------------------------===//
- // Instruction encodings. These are the standard/most common forms for X86
- // instructions.
- //
-
- // PseudoFrm - This represents an instruction that is a pseudo instruction
- // or one that has not been implemented yet. It is illegal to code generate
- // it, but tolerated for intermediate implementation stages.
- Pseudo = 0,
-
- /// Raw - This form is for instructions that don't have any operands, so
- /// they are just a fixed opcode value, like 'leave'.
- RawFrm = 1,
-
- /// AddRegFrm - This form is used for instructions like 'push r32' that have
- /// their one register operand added to their opcode.
- AddRegFrm = 2,
-
- /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
- /// to specify a destination, which in this case is a register.
- ///
- MRMDestReg = 3,
-
- /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
- /// to specify a destination, which in this case is memory.
- ///
- MRMDestMem = 4,
-
- /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
- /// to specify a source, which in this case is a register.
- ///
- MRMSrcReg = 5,
-
- /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
- /// to specify a source, which in this case is memory.
- ///
- MRMSrcMem = 6,
-
- /// MRM[0-7][rm] - These forms are used to represent instructions that use
- /// a Mod/RM byte, and use the middle field to hold extended opcode
- /// information. In the intel manual these are represented as /0, /1, ...
- ///
-
- // First, instructions that operate on a register r/m operand...
- MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
- MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
-
- // Next, instructions that operate on a memory r/m operand...
- MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
- MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
-
- // MRMInitReg - This form is used for instructions whose source and
- // destinations are the same register.
- MRMInitReg = 32,
-
- //// MRM_C1 - A mod/rm byte of exactly 0xC1.
- MRM_C1 = 33,
- MRM_C2 = 34,
- MRM_C3 = 35,
- MRM_C4 = 36,
- MRM_C8 = 37,
- MRM_C9 = 38,
- MRM_E8 = 39,
- MRM_F0 = 40,
- MRM_F8 = 41,
- MRM_F9 = 42,
- MRM_D0 = 45,
- MRM_D1 = 46,
-
- /// RawFrmImm8 - This is used for the ENTER instruction, which has two
- /// immediates, the first of which is a 16-bit immediate (specified by
- /// the imm encoding) and the second is a 8-bit fixed value.
- RawFrmImm8 = 43,
-
- /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
- /// immediates, the first of which is a 16 or 32-bit immediate (specified by
- /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
- /// manual, this operand is described as pntr16:32 and pntr16:16
- RawFrmImm16 = 44,
-
- FormMask = 63,
-
- //===------------------------------------------------------------------===//
- // Actual flags...
-
- // OpSize - Set if this instruction requires an operand size prefix (0x66),
- // which most often indicates that the instruction operates on 16 bit data
- // instead of 32 bit data.
- OpSize = 1 << 6,
-
- // AsSize - Set if this instruction requires an operand size prefix (0x67),
- // which most often indicates that the instruction address 16 bit address
- // instead of 32 bit address (or 32 bit address in 64 bit mode).
- AdSize = 1 << 7,
-
- //===------------------------------------------------------------------===//
- // Op0Mask - There are several prefix bytes that are used to form two byte
- // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
- // used to obtain the setting of this field. If no bits in this field is
- // set, there is no prefix byte for obtaining a multibyte opcode.
- //
- Op0Shift = 8,
- Op0Mask = 0x1F << Op0Shift,
-
- // TB - TwoByte - Set if this instruction has a two byte opcode, which
- // starts with a 0x0F byte before the real opcode.
- TB = 1 << Op0Shift,
-
- // REP - The 0xF3 prefix byte indicating repetition of the following
- // instruction.
- REP = 2 << Op0Shift,
-
- // D8-DF - These escape opcodes are used by the floating point unit. These
- // values must remain sequential.
- D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
- DA = 5 << Op0Shift, DB = 6 << Op0Shift,
- DC = 7 << Op0Shift, DD = 8 << Op0Shift,
- DE = 9 << Op0Shift, DF = 10 << Op0Shift,
-
- // XS, XD - These prefix codes are for single and double precision scalar
- // floating point operations performed in the SSE registers.
- XD = 11 << Op0Shift, XS = 12 << Op0Shift,
-
- // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
- T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
- A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
-
- // TF - Prefix before and after 0x0F
- TF = 17 << Op0Shift,
-
- //===------------------------------------------------------------------===//
- // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
- // They are used to specify GPRs and SSE registers, 64-bit operand size,
- // etc. We only cares about REX.W and REX.R bits and only the former is
- // statically determined.
- //
- REXShift = Op0Shift + 5,
- REX_W = 1 << REXShift,
-
- //===------------------------------------------------------------------===//
- // This three-bit field describes the size of an immediate operand. Zero is
- // unused so that we can tell if we forgot to set a value.
- ImmShift = REXShift + 1,
- ImmMask = 7 << ImmShift,
- Imm8 = 1 << ImmShift,
- Imm8PCRel = 2 << ImmShift,
- Imm16 = 3 << ImmShift,
- Imm16PCRel = 4 << ImmShift,
- Imm32 = 5 << ImmShift,
- Imm32PCRel = 6 << ImmShift,
- Imm64 = 7 << ImmShift,
-
- //===------------------------------------------------------------------===//
- // FP Instruction Classification... Zero is non-fp instruction.
-
- // FPTypeMask - Mask for all of the FP types...
- FPTypeShift = ImmShift + 3,
- FPTypeMask = 7 << FPTypeShift,
-
- // NotFP - The default, set for instructions that do not use FP registers.
- NotFP = 0 << FPTypeShift,
-
- // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
- ZeroArgFP = 1 << FPTypeShift,
-
- // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
- OneArgFP = 2 << FPTypeShift,
-
- // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
- // result back to ST(0). For example, fcos, fsqrt, etc.
- //
- OneArgFPRW = 3 << FPTypeShift,
-
- // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
- // explicit argument, storing the result to either ST(0) or the implicit
- // argument. For example: fadd, fsub, fmul, etc...
- TwoArgFP = 4 << FPTypeShift,
-
- // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
- // explicit argument, but have no destination. Example: fucom, fucomi, ...
- CompareFP = 5 << FPTypeShift,
-
- // CondMovFP - "2 operand" floating point conditional move instructions.
- CondMovFP = 6 << FPTypeShift,
-
- // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
- SpecialFP = 7 << FPTypeShift,
-
- // Lock prefix
- LOCKShift = FPTypeShift + 3,
- LOCK = 1 << LOCKShift,
-
- // Segment override prefixes. Currently we just need ability to address
- // stuff in gs and fs segments.
- SegOvrShift = LOCKShift + 1,
- SegOvrMask = 3 << SegOvrShift,
- FS = 1 << SegOvrShift,
- GS = 2 << SegOvrShift,
-
- // Execution domain for SSE instructions in bits 23, 24.
- // 0 in bits 23-24 means normal, non-SSE instruction.
- SSEDomainShift = SegOvrShift + 2,
-
- OpcodeShift = SSEDomainShift + 2,
-
- //===------------------------------------------------------------------===//
- /// VEX - The opcode prefix used by AVX instructions
- VEXShift = OpcodeShift + 8,
- VEX = 1U << 0,
-
- /// VEX_W - Has a opcode specific functionality, but is used in the same
- /// way as REX_W is for regular SSE instructions.
- VEX_W = 1U << 1,
-
- /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
- /// address instructions in SSE are represented as 3 address ones in AVX
- /// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4V = 1U << 2,
-
- /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
- /// must be encoded in the i8 immediate field. This usually happens in
- /// instructions with 4 operands.
- VEX_I8IMM = 1U << 3,
-
- /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
- /// instruction uses 256-bit wide registers. This is usually auto detected
- /// if a VR256 register is used, but some AVX instructions also have this
- /// field marked when using a f256 memory references.
- VEX_L = 1U << 4,
-
- /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
- /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
- /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
- /// storing a classifier in the imm8 field. To simplify our implementation,
- /// we handle this by storeing the classifier in the opcode field and using
- /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
- Has3DNow0F0FOpcode = 1U << 5
- };
-
- // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
- // specified machine instruction.
- //
- static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
- return TSFlags >> X86II::OpcodeShift;
- }
-
- static inline bool hasImm(uint64_t TSFlags) {
- return (TSFlags & X86II::ImmMask) != 0;
- }
-
- /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
- /// of the specified instruction.
- static inline unsigned getSizeOfImm(uint64_t TSFlags) {
- switch (TSFlags & X86II::ImmMask) {
- default: assert(0 && "Unknown immediate size");
- case X86II::Imm8:
- case X86II::Imm8PCRel: return 1;
- case X86II::Imm16:
- case X86II::Imm16PCRel: return 2;
- case X86II::Imm32:
- case X86II::Imm32PCRel: return 4;
- case X86II::Imm64: return 8;
- }
- }
-
- /// isImmPCRel - Return true if the immediate of the specified instruction's
- /// TSFlags indicates that it is pc relative.
- static inline unsigned isImmPCRel(uint64_t TSFlags) {
- switch (TSFlags & X86II::ImmMask) {
- default: assert(0 && "Unknown immediate size");
- case X86II::Imm8PCRel:
- case X86II::Imm16PCRel:
- case X86II::Imm32PCRel:
- return true;
- case X86II::Imm8:
- case X86II::Imm16:
- case X86II::Imm32:
- case X86II::Imm64:
- return false;
- }
- }
-
- /// getMemoryOperandNo - The function returns the MCInst operand # for the
- /// first field of the memory operand. If the instruction doesn't have a
- /// memory operand, this returns -1.
- ///
- /// Note that this ignores tied operands. If there is a tied register which
- /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
- /// counted as one operand.
- ///
- static inline int getMemoryOperandNo(uint64_t TSFlags) {
- switch (TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
- default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
- case X86II::Pseudo:
- case X86II::RawFrm:
- case X86II::AddRegFrm:
- case X86II::MRMDestReg:
- case X86II::MRMSrcReg:
- case X86II::RawFrmImm8:
- case X86II::RawFrmImm16:
- return -1;
- case X86II::MRMDestMem:
- return 0;
- case X86II::MRMSrcMem: {
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- unsigned FirstMemOp = 1;
- if (HasVEX_4V)
- ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
-
- // FIXME: Maybe lea should have its own form? This is a horrible hack.
- //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- return FirstMemOp;
- }
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- return -1;
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- return 0;
- case X86II::MRM_C1:
- case X86II::MRM_C2:
- case X86II::MRM_C3:
- case X86II::MRM_C4:
- case X86II::MRM_C8:
- case X86II::MRM_C9:
- case X86II::MRM_E8:
- case X86II::MRM_F0:
- case X86II::MRM_F8:
- case X86II::MRM_F9:
- case X86II::MRM_D0:
- case X86II::MRM_D1:
- return -1;
- }
- }
-}
-
inline static bool isScale(const MachineOperand &MO) {
return MO.isImm() &&
(MO.getImm() == 1 || MO.getImm() == 2 ||
@@ -621,14 +131,22 @@ class X86InstrInfo : public X86GenInstrInfo {
/// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
/// RegOp2MemOpTable2 - Load / store folding opcode maps.
///
- DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
- DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
- DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
- DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+ typedef DenseMap<unsigned,
+ std::pair<unsigned, unsigned> > RegOp2MemOpTableType;
+ RegOp2MemOpTableType RegOp2MemOpTable2Addr;
+ RegOp2MemOpTableType RegOp2MemOpTable0;
+ RegOp2MemOpTableType RegOp2MemOpTable1;
+ RegOp2MemOpTableType RegOp2MemOpTable2;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
- DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+ typedef DenseMap<unsigned,
+ std::pair<unsigned, unsigned> > MemOp2RegOpTableType;
+ MemOp2RegOpTableType MemOp2RegOpTable;
+
+ void AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags);
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -656,17 +174,6 @@ public:
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
- /// hasLoadFromStackSlot - If the specified machine instruction has
- /// a load from a stack slot, return true along with the FrameIndex
- /// of the loaded stack slot and the machine mem operand containing
- /// the reference. If not, return false. Unlike
- /// isLoadFromStackSlot, this returns true for any instructions that
- /// loads from the stack. This is a hint only and may not catch all
- /// cases.
- bool hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
-
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
/// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
/// stack locations as well. This uses a heuristic so it isn't
@@ -674,16 +181,6 @@ public:
unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
- /// hasStoreToStackSlot - If the specified machine instruction has a
- /// store to a stack slot, return true along with the FrameIndex of
- /// the loaded stack slot and the machine mem operand containing the
- /// reference. If not, return false. Unlike isStoreToStackSlot,
- /// this returns true for any instructions that loads from the
- /// stack. This is a hint only and may not catch all cases.
- bool hasStoreToStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
-
bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
@@ -750,6 +247,9 @@ public:
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
virtual
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -829,32 +329,21 @@ public:
/// instruction that defines the specified register class.
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
- static bool isX86_64NonExtLowByteReg(unsigned reg) {
- return (reg == X86::SPL || reg == X86::BPL ||
- reg == X86::SIL || reg == X86::DIL);
- }
-
static bool isX86_64ExtendedReg(const MachineOperand &MO) {
if (!MO.isReg()) return false;
- return isX86_64ExtendedReg(MO.getReg());
+ return X86II::isX86_64ExtendedReg(MO.getReg());
}
- /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
- /// higher) register? e.g. r8, xmm8, xmm13, etc.
- static bool isX86_64ExtendedReg(unsigned RegNo);
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
///
unsigned getGlobalBaseReg(MachineFunction *MF) const;
- /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
- /// and a bitmask of possible arguments to SetSSEDomain ase the second.
- std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const;
- /// SetSSEDomain - Set the SSEDomain of MI.
- void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index 7eb07b0..d54bf27 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -65,7 +65,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2,
def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
-def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
@@ -97,6 +97,8 @@ def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -133,9 +135,13 @@ def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
+
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
@@ -218,12 +224,16 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
+def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
+ [SDNPHasChain]>;
+
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
@@ -331,6 +341,11 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
+class ImmZExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
// Sign-extended immediate classes. We don't need to define the full lattice
// here because there is no instruction with an ambiguity between ImmSExti64i32
// and ImmSExti32i8.
@@ -358,6 +373,12 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti32i8";
}
+// [0, 0x000000FF]
+def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
+ let Name = "ImmZExtu32u8";
+}
+
+
// [0, 0x0000007F] |
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
@@ -377,6 +398,11 @@ def i32i8imm : Operand<i32> {
let ParserMatchClass = ImmSExti32i8AsmOperand;
let OperandType = "OPERAND_IMMEDIATE";
}
+// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
+def u32u8imm : Operand<i32> {
+ let ParserMatchClass = ImmZExtu32u8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
// 64-bits but only 32 bits are significant.
def i64i32imm : Operand<i64> {
@@ -389,11 +415,13 @@ def i64i32imm : Operand<i64> {
def i64i32imm_pcrel : Operand<i64> {
let PrintMethod = "print_pcrel_imm";
let ParserMatchClass = X86AbsMemAsmOperand;
+ let OperandType = "OPERAND_PCREL";
}
// 64-bits but only 8 bits are significant.
def i64i8imm : Operand<i64> {
let ParserMatchClass = ImmSExti64i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def lea64_32mem : Operand<i32> {
@@ -442,18 +470,33 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
+def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
+def HasF16C : Predicate<"Subtarget->hasF16C()">;
+def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
+def HasBMI : Predicate<"Subtarget->hasBMI()">;
def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
+def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
AssemblerPredicate<"Mode64Bit">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"ModeNaCl">;
+def IsNaCl32 : Predicate<"Subtarget->isTargetNaCl32()">,
+ AssemblerPredicate<"ModeNaCl,!Mode64Bit">;
+def IsNaCl64 : Predicate<"Subtarget->isTargetNaCl64()">,
+ AssemblerPredicate<"ModeNaCl,Mode64Bit">;
+def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"!ModeNaCl">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
@@ -766,30 +809,30 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
-def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
-def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
-def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", []>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", []>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", []>;
def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
}
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
-def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", []>;
let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
-def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", []>, OpSize;
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
-def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", []>;
let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
-def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
-def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", []>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", []>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", []>;
def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
-def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
-def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
-def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", []>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", []>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", []>;
def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
@@ -841,22 +884,22 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{b}\t{$src, %al|%al, $src}", []>,
+ "mov{b}\t{$src, %al|AL, $src}", []>,
Requires<[In32BitMode]>;
def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
- "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize,
+ "mov{w}\t{$src, %ax|AL, $src}", []>, OpSize,
Requires<[In32BitMode]>;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
- "mov{l}\t{$src, %eax|%eax, $src}", []>,
+ "mov{l}\t{$src, %eax|EAX, $src}", []>,
Requires<[In32BitMode]>;
def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{b}\t{%al, $dst|$dst, %al}", []>,
+ "mov{b}\t{%al, $dst|$dst, AL}", []>,
Requires<[In32BitMode]>;
def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
- "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize,
+ "mov{w}\t{%ax, $dst|$dst, AL}", []>, OpSize,
Requires<[In32BitMode]>;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
- "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
+ "mov{l}\t{%eax, $dst|$dst, EAX}", []>,
Requires<[In32BitMode]>;
// FIXME: These definitions are utterly broken
@@ -865,13 +908,13 @@ def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
// in question.
/*
def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{q}\t{$src, %rax|%rax, $src}", []>;
+ "mov{q}\t{$src, %rax|RAX, $src}", []>;
def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
- "mov{q}\t{$src, %rax|%rax, $src}", []>;
+ "mov{q}\t{$src, %rax|RAX, $src}", []>;
def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+ "mov{q}\t{%rax, $dst|$dst, RAX}", []>;
def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
- "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+ "mov{q}\t{%rax, $dst|$dst, RAX}", []>;
*/
@@ -926,7 +969,7 @@ let mayStore = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
-let mayLoad = 1,
+let mayLoad = 1, neverHasSideEffects = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
@@ -1117,11 +1160,15 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
}
def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
- "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ "xchg{w}\t{$src, %ax|AX, $src}", []>, OpSize;
def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
- "xchg{l}\t{$src, %eax|%eax, $src}", []>;
+ "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>;
+// Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding.
+// xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP.
+def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
+ "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In64BitMode]>;
def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
- "xchg{q}\t{$src, %rax|%rax, $src}", []>;
+ "xchg{q}\t{$src, %rax|RAX, $src}", []>;
@@ -1172,7 +1219,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
- "cmpxchg16b\t$dst", []>, TB;
+ "cmpxchg16b\t$dst", []>, TB, Requires<[HasCmpxchg16b]>;
@@ -1261,6 +1308,104 @@ def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
"arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
//===----------------------------------------------------------------------===//
+// MOVBE Instructions
+//
+let Predicates = [HasMOVBE] in {
+ def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8;
+ def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8;
+ def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8;
+ def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8;
+ def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR32:$src), addr:$dst)]>, T8;
+ def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR64:$src), addr:$dst)]>, T8;
+}
+
+//===----------------------------------------------------------------------===//
+// RDRAND Instruction
+//
+let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
+ def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins),
+ "rdrand{w}\t$dst", []>, OpSize, TB;
+ def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins),
+ "rdrand{l}\t$dst", []>, TB;
+ def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins),
+ "rdrand{q}\t$dst", []>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// LZCNT Instruction
+//
+let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
+ def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, XS,
+ OpSize;
+ def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize;
+
+ def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, XS;
+ def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// TZCNT Instruction
+//
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS,
+ OpSize;
+ def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize;
+
+ def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS;
+ def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
// Subsystems.
//===----------------------------------------------------------------------===//
@@ -1646,3 +1791,9 @@ def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>;
def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms.
+def : InstAlias<"xchgw %ax, $src", (XCHG16ar GR16:$src)>;
+def : InstAlias<"xchgl %eax, $src", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>;
+def : InstAlias<"xchgl %eax, $src", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>;
+def : InstAlias<"xchgq %rax, $src", (XCHG64ar GR64:$src)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index fe11d77..d3ced23 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -116,7 +116,217 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
//===----------------------------------------------------------------------===//
-// SSE 1 & 2 - Move Instructions
+// Non-instruction patterns
+//===----------------------------------------------------------------------===//
+
+// A vector extract of the first f32/f64 position is a subregister copy
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// A 128-bit subvector extract from the first 256-bit vector position
+// is a subregister copy that needs no instruction.
+def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))),
+ (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
+def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))),
+ (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
+
+def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))),
+ (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
+def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))),
+ (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
+
+def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))),
+ (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
+def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))),
+ (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
+
+// A 128-bit subvector insert to the first 256-bit vector position
+// is a subregister copy that needs no instruction.
+def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+
+// Bitcasts between 128-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+let Predicates = [HasXMMInt] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+// Bitcasts between 256-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
+}
+
+// Alias instructions that map fld0 to pxor for sse.
+// FIXME: Set encoding to pseudo!
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in {
+ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+ def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+ def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+ def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX & SSE - Zero/One Vectors
+//===----------------------------------------------------------------------===//
+
+// Alias instruction that maps zero vector to pxor / xorp* for sse.
+// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
+// swizzled by ExecutionDepsFix to pxor.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, neverHasSideEffects = 1 in {
+def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>;
+}
+
+def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+
+
+// The same as done above but for AVX. The 256-bit ISA does not support PI,
+// and doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+}
+
+
+// AVX has no support for 256-bit integer instructions, but since the 128-bit
+// VPXOR instruction writes zero to its upper part, it's safe build zeros.
+def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
+
+def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementation, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
+ def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
+
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move FP Scalar Instructions
+//
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
@@ -130,28 +340,57 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))]>;
-// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
-// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
-// is used instead. Register-to-register movss/movsd is not modeled as an
-// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
-// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+// AVX
def VMOVSSrr : sse12_move_rr<FR32, v4f32,
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
+ VEX_LIG;
def VMOVSDrr : sse12_move_rr<FR64, v2f64,
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
+ VEX_LIG;
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+// For the disassembler
+let isCodeGenOnly = 1 in {
+ def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ XS, VEX_4V, VEX_LIG;
+ def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ XD, VEX_4V, VEX_LIG;
+}
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
+ VEX_LIG;
let AddedComplexity = 20 in
- def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
+ VEX_LIG;
}
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG;
+
+// SSE1 & 2
let Constraints = "$src1 = $dst" in {
def MOVSSrr : sse12_move_rr<FR32, v4f32,
"movss\t{$src2, $dst|$dst, $src2}">, XS;
def MOVSDrr : sse12_move_rr<FR64, v2f64,
"movsd\t{$src2, $dst|$dst, $src2}">, XD;
+
+ // For the disassembler
+ let isCodeGenOnly = 1 in {
+ def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}", []>, XS;
+ def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}", []>, XD;
+ }
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -161,54 +400,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
}
-let AddedComplexity = 15 in {
-// Extract the low 32-bit value from one vector and insert it into another.
-def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
-// Extract the low 64-bit value from one vector and insert it into another.
-def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-}
-
-// Implicitly promote a 32-bit scalar to a vector.
-def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-// Implicitly promote a 32-bit scalar to a vector.
-def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-
-let AddedComplexity = 20 in {
-// MOVSSrm zeros the high parts of the register; represent this
-// with SUBREG_TO_REG.
-def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-// MOVSDrm zeros the high parts of the register; represent this
-// with SUBREG_TO_REG.
-def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-}
-
-// Store scalar value to memory.
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>;
@@ -216,24 +407,257 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
-def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)]>, XS, VEX;
-def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>, XD, VEX;
+// Patterns
+let Predicates = [HasSSE1] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 32-bit value from one vector and insert it into another.
+ def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSS to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ }
-// Extract and store.
-def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+ // Shuffle with MOVSS
+ def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (MOVSSrr VR128:$src1, FR32:$src2)>;
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 64-bit value from one vector and insert it into another.
+ def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+ def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSD to the lower bits.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+ // Shuffle with MOVSD
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (MOVSDrr VR128:$src1, FR64:$src2)>;
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+}
+
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 32-bit value from one vector and insert it into another.
+ def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+
+ // Extract the low 64-bit value from one vector and insert it into another.
+ def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+ def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVS{S,D} to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+
+ // Represent the same patterns above but in the form they appear for
+ // 256-bit types
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
+ }
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
+ (SUBREG_TO_REG (i32 0),
+ (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
+ (SUBREG_TO_REG (i64 0),
+ (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
+ sub_xmm)>;
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+ // Shuffle with VMOVSS
+ def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (VMOVSSrr VR128:$src1, FR32:$src2)>;
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+
+ // Shuffle with VMOVSD
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (VMOVSDrr VR128:$src1, FR64:$src2)>;
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
+ sub_sd))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
+ sub_sd))>;
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
+ sub_sd))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
+ sub_sd))>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
+//===----------------------------------------------------------------------===//
-// Move Aligned/Unaligned floating point values
multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d,
@@ -248,22 +672,22 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
}
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
- "movaps", SSEPackedSingle>, VEX;
+ "movaps", SSEPackedSingle>, TB, VEX;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
- "movapd", SSEPackedDouble>, OpSize, VEX;
+ "movapd", SSEPackedDouble>, TB, OpSize, VEX;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
- "movups", SSEPackedSingle>, VEX;
+ "movups", SSEPackedSingle>, TB, VEX;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
- "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+ "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
- "movaps", SSEPackedSingle>, VEX;
+ "movaps", SSEPackedSingle>, TB, VEX;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
- "movapd", SSEPackedDouble>, OpSize, VEX;
+ "movapd", SSEPackedDouble>, TB, OpSize, VEX;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
- "movups", SSEPackedSingle>, VEX;
+ "movups", SSEPackedSingle>, TB, VEX;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
- "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+ "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, TB;
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -287,10 +711,10 @@ def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX;
+ [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX;
+ [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
@@ -298,6 +722,34 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+// For disassembler
+let isCodeGenOnly = 1 in {
+ def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
@@ -319,24 +771,155 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)]>;
-// Intrinsic forms of MOVUPS/D load and store
-def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
-def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
-
-def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
-def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
-
-// Move Low/High packed floating point values
+// For disassembler
+let isCodeGenOnly = 1 in {
+ def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+ def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+ def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>;
+ def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
+ (VMOVUPDmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE1] in
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [HasSSE2] in
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
+ (MOVUPDmr addr:$dst, VR128:$src)>;
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX integer load/store.
+let Predicates = [HasAVX] in {
+ // 128-bit load/store
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+ // 256-bit load/store
+ def : Pat<(alignedloadv4i64 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv4i64 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16i16 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v32i8 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded. FIXME: Set encoding to pseudo!
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded. FIXME: Set encoding to pseudo!
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+let isCodeGenOnly = 1 in {
+ def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX;
+ def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+
multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
PatFrag mov_frag, string base_opc,
string asm_opr> {
@@ -359,14 +942,10 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
let AddedComplexity = 20 in {
defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
- defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $dst|$dst, $src2}">;
- defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
- "\t{$src2, $dst|$dst, $src2}">;
}
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -386,6 +965,147 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+ def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
+ def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+ def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
+ VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+
+ // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
+ def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with VMOVLPS
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+
+ // Shuffle with VMOVLPD
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE1] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+ def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+ def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
+ VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with MOVLPS
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
+ def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
+ def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with MOVLPD
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Hi packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 20 in {
+ defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $dst|$dst, $src2}">;
+}
+
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -411,6 +1131,80 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>;
+let Predicates = [HasAVX] in {
+ // VMOVHPS patterns
+ def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: This should be matched by a X86Movhpd instead. Same as above
+ def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (VMOVHPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (VMOVHPDmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE1] in {
+ // MOVHPS patterns
+ def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: This should be matched by a X86Movhpd instead. Same as above
+ def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+
let AddedComplexity = 20 in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -438,13 +1232,80 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
(v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
}
-def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
-let AddedComplexity = 20 in {
- def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
- def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+let Predicates = [HasAVX] in {
+ // MOVLHPS patterns
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+
+ // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+ def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ }
+ def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+ // MOVHLPS patterns
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+ def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+ // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+ def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
+ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
+ }
+
+ def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE1] in {
+ // MOVLHPS patterns
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+
+ // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+ def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ }
+ def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+ // MOVHLPS patterns
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+ def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+ // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+ def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+ }
+
+ def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
}
//===----------------------------------------------------------------------===//
@@ -462,10 +1323,9 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- []>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- []>;
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -481,36 +1341,39 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
- VEX_W;
+ VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
+ VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
- VEX, VEX_W;
+ VEX, VEX_W, VEX_LIG;
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
- VEX_4V;
+ VEX_4V, VEX_LIG;
defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
- VEX_4V, VEX_W;
+ VEX_4V, VEX_W, VEX_LIG;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
- VEX_4V;
+ VEX_4V, VEX_LIG;
defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
- VEX_4V;
+ VEX_4V, VEX_LIG;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
- VEX_4V, VEX_W;
+ VEX_4V, VEX_W, VEX_LIG;
let Predicates = [HasAVX] in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
@@ -579,11 +1442,6 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
-defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si">, XS, VEX;
-defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
- XS, VEX, VEX_W;
defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
f128mem, load, "cvtsd2si">, XD, VEX;
defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
@@ -594,14 +1452,12 @@ defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
// Get rid of this hack or rename the intrinsics, there are several
// intructions that only match with the intrinsic form, why create duplicates
// to let them be recognized by the assembler?
-defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
-defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si">, XS;
-defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
- f32mem, load, "cvtss2si{q}">, XS, REX_W;
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W,
+ VEX_LIG;
+
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
f128mem, load, "cvtsd2si{l}">, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
@@ -660,10 +1516,11 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
let Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS,
+ VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
"cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
- VEX_W;
+ VEX_W, VEX_LIG;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
@@ -671,6 +1528,7 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
}
+
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
@@ -681,19 +1539,43 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
+let Predicates = [HasSSE1] in {
+ def : Pat<(int_x86_sse_cvtss2si VR128:$src),
+ (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
+ (CVTSS2SIrm addr:$src)>;
+ def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
+ (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
+ (CVTSS2SI64rm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_cvtss2si VR128:$src),
+ (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
+ (VCVTSS2SIrm addr:$src)>;
+ def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
+ (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
+ (VCVTSS2SI64rm addr:$src)>;
+}
+
/// SSE 2 Only
// Convert scalar double to scalar single
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V;
+ VEX_4V, VEX_LIG;
+let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
+ []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
- Requires<[HasAVX]>;
+ Requires<[HasAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
@@ -715,13 +1597,25 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, Requires<[HasAVX]>, VEX_4V;
+ []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
-def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
- Requires<[HasAVX]>;
+ []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(f64 (fextend FR32:$src)),
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>;
+ def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+}
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -732,6 +1626,16 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, OptForSize]>;
+// extload f32 -> f64. This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+def : Pat<(fextend (loadf32 addr:$src)),
+ (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>;
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
+
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -759,10 +1663,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Requires<[HasSSE2]>;
}
-def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>,
- Requires<[HasSSE2, OptForSpeed]>;
-
// Convert doubleword to packed single/double fp
// SSE2 instructions without OpSize prefix
def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -862,10 +1762,12 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// SSE2 packed instructions with XS prefix
def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+let mayLoad = 1 in
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+let mayLoad = 1 in
def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -877,7 +1779,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
-
def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -889,16 +1790,33 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memop addr:$src)))]>,
XS, VEX, Requires<[HasAVX]>;
-def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
- VEX;
-def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))]>, VEX;
+let Predicates = [HasSSE2] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (CVTTPS2DQrr VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (VCVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
+ (VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
+ (VCVTTPS2DQYrr VR256:$src)>;
+}
+
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX;
+let isCodeGenOnly = 1 in
+def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>, VEX;
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -910,8 +1828,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
-def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
@@ -931,13 +1847,13 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
}
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
@@ -947,12 +1863,12 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
- VEX, Requires<[HasAVX]>;
+ TB, VEX, Requires<[HasAVX]>;
def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
- VEX, Requires<[HasAVX]>;
+ TB, VEX, Requires<[HasAVX]>;
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1038,75 +1954,61 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
(VCVTTPS2DQYrm addr:$src)>;
+// Match fround and fextend for 128/256-bit conversions
+def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ (VCVTPS2PDYrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ SDNode OpNode, ValueType VT, PatFrag ld_frag,
string asm, string asm_alt> {
- let isAsmParserOnly = 1 in {
- def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
- asm, []>;
- let mayLoad = 1 in
- def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
- asm, []>;
- }
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>;
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1),
+ (ld_frag addr:$src2), imm:$cc))]>;
// Accept explicit immediate argument form instead of comparison code.
- def rr_alt : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
- asm_alt, []>;
- let mayLoad = 1 in
- def rm_alt : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
- asm_alt, []>;
+ let neverHasSideEffects = 1 in {
+ def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>;
+ }
}
-let neverHasSideEffects = 1 in {
- defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
- "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
- XS, VEX_4V;
- defm VCMPSD : sse12_cmp_scalar<FR64, f64mem,
- "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
- XD, VEX_4V;
-}
+defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+ "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ XS, VEX_4V, VEX_LIG;
+defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+ "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ XD, VEX_4V, VEX_LIG;
let Constraints = "$src1 = $dst" in {
-def CMPSSrr : SIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc),
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS;
-def CMPSSrm : SIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc),
- "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS;
-def CMPSDrr : SIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc),
- "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD;
-def CMPSDrm : SIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc),
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+ XS;
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD;
-}
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
-def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
-def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
-def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
-def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+ XD;
}
multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
@@ -1151,25 +2053,28 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, VEX;
+ "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ "ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
+ VEX_LIG;
let Pattern = []<dag> in {
defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
- "comiss", SSEPackedSingle>, VEX;
+ "comiss", SSEPackedSingle>, TB, VEX,
+ VEX_LIG;
defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
- "comisd", SSEPackedDouble>, OpSize, VEX;
+ "comisd", SSEPackedDouble>, TB, OpSize, VEX,
+ VEX_LIG;
}
defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
- load, "ucomiss", SSEPackedSingle>, VEX;
+ load, "ucomiss", SSEPackedSingle>, TB, VEX;
defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
- load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
- load, "comiss", SSEPackedSingle>, VEX;
+ load, "comiss", SSEPackedSingle>, TB, VEX;
defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
- load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ load, "comisd", SSEPackedDouble>, TB, OpSize, VEX;
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", SSEPackedSingle>, TB;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
@@ -1199,57 +2104,82 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Domain d> {
let isAsmParserOnly = 1 in {
def rri : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>;
def rmi : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>;
}
// Accept explicit immediate argument form instead of comparison code.
def rri_alt : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
asm_alt, [], d>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc),
asm_alt, [], d>;
}
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
+ "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, TB, VEX_4V;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
+ "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, TB, VEX_4V;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+ "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
+ "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedSingle>, TB;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $dst|$dst, $src}",
- "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+ "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
+ "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedDouble>, TB, OpSize;
}
+let Predicates = [HasSSE1] in {
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [HasSSE2] in {
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
+ (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
+def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
+ (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
+def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
+}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Shuffle Instructions
@@ -1293,6 +2223,132 @@ let Constraints = "$src1 = $dst" in {
memopv2f64, SSEPackedDouble>, TB, OpSize;
}
+let Predicates = [HasSSE1] in {
+ def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+ // fall back to this for SSE1)
+ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (SHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPSrri case.
+ def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+}
+
+let Predicates = [HasSSE2] in {
+ // Special binary v4i32 shuffle cases with SHUFPS.
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (SHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (SHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPDrri cases.
+ def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v2i64 shuffle cases using SHUFPDrri.
+ def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (SHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Generic SHUFPD patterns
+ def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+ // fall back to this for SSE1)
+ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (VSHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPSrri case.
+ def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v4i32 shuffle cases with SHUFPS.
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPDrri cases.
+ def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v2i64 shuffle cases using SHUFPDrri.
+ def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (VSHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+
+ def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+ // 256-bit patterns
+ def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v8i32 (X86Shufps VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v8f32 (X86Shufps VR256:$src1,
+ (memopv8f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v4i64 (X86Shufpd VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v4f64 (X86Shufpd VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Unpack Instructions
//===----------------------------------------------------------------------===//
@@ -1316,29 +2372,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
let AddedComplexity = 10 in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V;
defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V;
defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V;
defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V;
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
@@ -1356,6 +2412,103 @@ let AddedComplexity = 10 in {
} // Constraints = "$src1 = $dst"
} // AddedComplexity
+let Predicates = [HasSSE1] in {
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKHPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+ // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // problem is during lowering, where it's not possible to recognize the load
+ // fold cause it has two uses through a bitcast. One use disappears at isel
+ // time and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
+ let AddedComplexity = 10 in
+ def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKHPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+
+ // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // problem is during lowering, where it's not possible to recognize the load
+ // fold cause it has two uses through a bitcast. One use disappears at isel
+ // time and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+ let AddedComplexity = 10 in
+ def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Extract Floating-Point Sign mask
//===----------------------------------------------------------------------===//
@@ -1370,91 +2523,60 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
}
-// Mask creation
-defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
- "movmskps", SSEPackedSingle>, VEX;
-defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
-defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
- "movmskps", SSEPackedSingle>, VEX;
-defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
SSEPackedSingle>, TB;
defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
-// X86fgetsign
-def MOVMSKPDrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
- "movmskpd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
-def MOVMSKPDrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
- "movmskpd\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
-def MOVMSKPSrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
- "movmskps\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
-def MOVMSKPSrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
- "movmskps\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
-
-// Assembler Only
-def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
-def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
-def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
-def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
-
-//===----------------------------------------------------------------------===//
-// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
-//===----------------------------------------------------------------------===//
-
-// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
-// names that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in {
- // FIXME: Set encoding to pseudo!
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
-def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
-def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
-}
-
-// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
-// bits are disregarded.
-let neverHasSideEffects = 1 in {
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
-}
+def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>, Requires<[HasSSE2]>;
+def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>, Requires<[HasSSE2]>;
-// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
-// bits are disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+let Predicates = [HasAVX] in {
+ defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, TB, VEX;
+ defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, TB,
+ OpSize, VEX;
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, TB, VEX;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, TB,
+ OpSize, VEX;
+
+ def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>;
+ def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>;
+ def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>;
+ def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>;
+
+ // Assembler Only
+ def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+ def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+ OpSize, VEX;
+ def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+ def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+ OpSize, VEX;
}
//===----------------------------------------------------------------------===//
@@ -1466,10 +2588,10 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
@@ -1494,21 +2616,22 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
- let Pattern = []<dag> in {
- defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f128mem,
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>, VEX_4V;
-
- defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f128mem,
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>,
- OpSize, VEX_4V;
- }
+ // In AVX no need to add a pattern for 128-bit logical rr ps, because they
+ // are all promoted to v2i64, and the patterns are covered by the int
+ // version. This is needed in SSE only, because v2i64 isn't supported on
+ // SSE1, but only on SSE2.
+ defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem, [],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
@@ -1533,7 +2656,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "ps"), f256mem,
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>, VEX_4V;
+ (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V;
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
@@ -1541,7 +2664,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
(bc_v4i64 (v4f64 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
(memopv4i64 addr:$src2)))], 0>,
- OpSize, VEX_4V;
+ TB, OpSize, VEX_4V;
}
// AVX 256-bit packed logical ops forms
@@ -1632,32 +2755,32 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
// Binary Arithmetic instructions
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_s_int<0x58, "add", 0>,
- basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG;
+defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG;
+defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG;
+ defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG;
+ defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG;
+ defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG;
+ defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
basic_sse12_fp_binop_p_y_int<0x5D, "min">,
basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
@@ -1720,23 +2843,18 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
}
/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
-multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F32Int> {
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, XS, Requires<[HasAVX, OptForSize]>;
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ let mayLoad = 1 in
+ def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
!strconcat(OpcodeStr,
- "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))]>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins ssmem:$src1, VR128:$src2),
!strconcat(OpcodeStr,
- "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -1801,21 +2919,17 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
}
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
-multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F64Int> {
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))]>;
- def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
- !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -1863,9 +2977,8 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
// Square root.
- defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
- sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
- VEX_4V;
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">,
+ sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
@@ -1879,21 +2992,76 @@ let Predicates = [HasAVX] in {
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
- defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
- int_x86_sse_rsqrt_ss>, VEX_4V;
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
- defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
- VEX_4V;
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
}
+def : Pat<(f32 (fsqrt FR32:$src)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+def : Pat<(f64 (fsqrt FR64:$src)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
+def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+def : Pat<(f32 (X86frsqrt FR32:$src)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+def : Pat<(f32 (X86frcp FR32:$src)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
+ (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
+ sub_sd)>;
+ def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
+ (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
+
+ def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
+ (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
+ (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+}
+
// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt>,
@@ -1992,7 +3160,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>;
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
@@ -2006,7 +3174,7 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
}
//===----------------------------------------------------------------------===//
-// SSE 1 & 2 - Misc Instructions (No AVX form)
+// SSE 1 & 2 - Prefetch and memory fence
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
@@ -2019,66 +3187,26 @@ def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
-// Load, store, and memory fence
-def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
- TB, Requires<[HasSSE1]>;
-def : Pat<(X86SFence), (SFENCE)>;
-
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementation, it does not expand the instructions below like
-// X86MCInstLower does.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in {
-def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4f32 immAllZerosV))]>;
-def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v2f64 immAllZerosV))]>;
-let ExeDomain = SSEPackedInt in
-def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllZerosV))]>;
-}
-
-// The same as done above but for AVX. The 128-bit versions are the
-// same, but re-encoded. The 256-bit does not support PI version, and
-// doesn't need it because on sandy bridge the register is set to zero
-// at the rename stage without using any execution unit, so SET0PSY
-// and SET0PDY can be used for vector int instructions without penalty
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementatioan, it does not expand the instructions below like
-// X86MCInstLower does.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, Predicates = [HasAVX] in {
-def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
-let ExeDomain = SSEPackedInt in
-def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllZerosV))]>;
-}
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
-def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
+ "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
-// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
-// in the non-AVX version bits 127:64 aren't touched. Find a better way to
-// represent this instead of always zeroing SRC1. One possible solution is
-// to represent the instruction w/ something similar as the "$src1 = $dst"
-// constraint but without the tied operands.
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
- Requires<[HasAVX, OptForSpeed]>;
+def : Pat<(X86SFence), (SFENCE)>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Load/Store XCSR register
@@ -2106,10 +3234,22 @@ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
}
-def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
-def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// For Disassembler
+let isCodeGenOnly = 1 in {
+def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+}
let canFoldAsLoad = 1, mayLoad = 1 in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -2147,6 +3287,16 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[]>, XS, Requires<[HasSSE2]>;
+// For Disassembler
+let isCodeGenOnly = 1 in {
+def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>;
+
+def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ []>, XS, Requires<[HasSSE2]>;
+}
+
let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
@@ -2180,9 +3330,11 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
} // ExeDomain = SSEPackedInt
-def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
-def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
- (VMOVDQUYmr addr:$dst, VR256:$src)>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+ def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+}
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
@@ -2415,15 +3567,14 @@ let ExeDomain = SSEPackedInt in {
def VPANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>, VEX_4V;
+ [(set VR128:$dst,
+ (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V;
def VPANDNrm : PDI<0xDF, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>,
- VEX_4V;
+ [(set VR128:$dst, (X86andnp VR128:$src1,
+ (memopv2i64 addr:$src2)))]>, VEX_4V;
}
}
@@ -2527,6 +3678,32 @@ let Predicates = [HasAVX] in {
0>, VEX_4V;
defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
0>, VEX_4V;
+
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (VPCMPEQBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (VPCMPEQWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (VPCMPEQDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (VPCMPGTBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (VPCMPGTWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (VPCMPGTDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTDrm VR128:$src1, addr:$src2)>;
}
let Constraints = "$src1 = $dst" in {
@@ -2538,31 +3715,33 @@ let Constraints = "$src1 = $dst" in {
defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
} // Constraints = "$src1 = $dst"
-def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
- (PCMPEQBrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
- (PCMPEQBrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
- (PCMPEQWrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
- (PCMPEQWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
- (PCMPEQDrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
- (PCMPEQDrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
- (PCMPGTBrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
- (PCMPGTBrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
- (PCMPGTWrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
- (PCMPGTWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
- (PCMPGTDrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
- (PCMPGTDrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasSSE2] in {
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (PCMPEQBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (PCMPEQWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (PCMPGTBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (PCMPGTWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (PCMPGTDrm VR128:$src1, addr:$src2)>;
+}
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Pack Instructions
@@ -2608,7 +3787,7 @@ def mi : Ii8<0x70, MRMSrcMem,
let Predicates = [HasAVX] in {
let AddedComplexity = 5 in
- defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+ defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize,
VEX;
// SSE2 with ImmT == Imm8 and XS prefix.
@@ -2618,6 +3797,34 @@ let Predicates = [HasAVX] in {
// SSE2 with ImmT == Imm8 and XD prefix.
defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
VEX;
+
+ let AddedComplexity = 5 in
+ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+ // Unary v4f32 shuffle with VPSHUF* in order to fold a load.
+ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (VPSHUFHWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (VPSHUFHWmi addr:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (VPSHUFLWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (VPSHUFLWmi addr:$src, imm:$imm)>;
}
let Predicates = [HasSSE2] in {
@@ -2629,6 +3836,34 @@ let Predicates = [HasSSE2] in {
// SSE2 with ImmT == Imm8 and XD prefix.
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+
+ let AddedComplexity = 5 in
+ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+ // Unary v4f32 shuffle with PSHUF* in order to fold a load.
+ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (PSHUFHWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (PSHUFLWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
@@ -2637,71 +3872,69 @@ let Predicates = [HasSSE2] in {
let ExeDomain = SSEPackedInt in {
multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
- PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
+ SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+ [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (unp_frag VR128:$src1,
+ [(set VR128:$dst, (OpNode VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))]>;
}
let Predicates = [HasAVX] in {
- defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
- 0>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
- 0>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
- 0>, VEX_4V;
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+ bc_v16i8, 0>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+ bc_v4i32, 0>, VEX_4V;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
+ VR128:$src2)))]>, VEX_4V;
def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
-
- defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
- 0>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
- 0>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
- 0>, VEX_4V;
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+ bc_v16i8, 0>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+ bc_v4i32, 0>, VEX_4V;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
+ VR128:$src2)))]>, VEX_4V;
def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>;
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
@@ -2709,17 +3942,17 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1,
+ (v2i64 (X86Punpcklqdq VR128:$src1,
(memopv2i64 addr:$src2))))]>;
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>;
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
@@ -2727,17 +3960,24 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1,
+ (v2i64 (X86Punpckhqdq VR128:$src1,
(memopv2i64 addr:$src2))))]>;
}
-
} // ExeDomain = SSEPackedInt
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+ def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
+}
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Extract and Insert
//===---------------------------------------------------------------------===//
@@ -2769,7 +4009,7 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, OpSize, VEX;
+ imm:$src2))]>, TB, OpSize, VEX;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -2778,11 +4018,11 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
// Insert
let Predicates = [HasAVX] in {
- defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+ defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, OpSize, VEX_4V;
+ []>, TB, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in
@@ -2839,7 +4079,9 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
// SSE2 - Move Doubleword
//===---------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// Move Int Doubleword to Packed Double Int
+//
def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2849,6 +4091,14 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
VEX;
+def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>, VEX;
+def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX;
+
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2865,8 +4115,9 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))]>;
-
+//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
+//
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
@@ -2883,7 +4134,9 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
+//
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2902,22 +4155,48 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
-def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+//===---------------------------------------------------------------------===//
+// Move Packed Doubleword Int first element to Doubleword Int
+//
+def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>,
+ TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
+
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+
+//===---------------------------------------------------------------------===//
+// Bitcast FR64 <-> GR64
+//
+let Predicates = [HasAVX] in
+def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ VEX;
+def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
- (iPTR 0)))]>;
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))]>;
-def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-
+//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
+//
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
@@ -2931,7 +4210,9 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
-// movd / movq to XMM register zero-extends
+//===---------------------------------------------------------------------===//
+// Patterns and instructions to describe movd/movq to XMM register zero-extends
+//
let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -2967,15 +4248,36 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
(loadi32 addr:$src))))))]>;
+}
-def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+let Predicates = [HasSSE2], AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
(MOVZDI2PDIrm addr:$src)>;
-def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
-def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
}
+let Predicates = [HasAVX] in {
+ // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ }
+ // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
+}
+
// These are the correct encodings of the instructions so that we know how to
// read correct assembly, even though we continue to emit the wrong ones for
// compatibility with Darwin's buggy assembler.
@@ -2996,7 +4298,9 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
// SSE2 - Move Quadword
//===---------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
+//
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3008,7 +4312,9 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+//===---------------------------------------------------------------------===//
// Move Packed Quadword Int to Quadword Int
+//
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -3018,10 +4324,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
+//===---------------------------------------------------------------------===//
// Store / copy lower 64-bits of a XMM register.
+//
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
@@ -3037,7 +4342,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(loadi64 addr:$src))))))]>,
XS, VEX, Requires<[HasAVX]>;
-let AddedComplexity = 20 in {
+let AddedComplexity = 20 in
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3045,15 +4350,27 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(loadi64 addr:$src))))))]>,
XS, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+let Predicates = [HasSSE2], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
-def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
(MOVZQI2PQIrm addr:$src)>;
-def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+let Predicates = [HasAVX], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)),
+ (VMOVZQI2PQIrm addr:$src)>;
}
+//===---------------------------------------------------------------------===//
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
+//
let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -3077,9 +4394,21 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl
(loadv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+}
-def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
- (MOVZPQILo2PQIrm addr:$src)>;
+let AddedComplexity = 20 in {
+ let Predicates = [HasSSE2] in {
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>;
+ }
+ let Predicates = [HasAVX] in {
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (VMOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (VMOVZPQILo2PQIrr VR128:$src)>;
+ }
}
// Instructions to match in the assembler
@@ -3102,37 +4431,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, XS;
//===---------------------------------------------------------------------===//
-// SSE2 - Misc Instructions
-//===---------------------------------------------------------------------===//
-
-// Flush cache
-def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- TB, Requires<[HasSSE2]>;
-
-// Load, store, and memory fence
-def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
- "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
- "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
-def : Pat<(X86LFence), (LFENCE)>;
-def : Pat<(X86MFence), (MFENCE)>;
-
-
-// Pause. This "instruction" is encoded as "rep; nop", so even though it
-// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
-
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-ones value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
- // FIXME: Change encoding to pseudo.
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
-
-//===---------------------------------------------------------------------===//
// SSE3 - Conversion Instructions
//===---------------------------------------------------------------------===//
@@ -3164,6 +4462,11 @@ def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ (VCVTPD2DQYrm addr:$src)>;
+
// Convert Packed DW Integers to Packed Double FP
let Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -3192,41 +4495,74 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
(VCVTPD2DQYrm addr:$src)>;
+def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))),
+ (VCVTDQ2PDYrm addr:$src)>;
+
//===---------------------------------------------------------------------===//
-// SSE3 - Move Instructions
+// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
-
-// Replicate Single FP
-multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
-def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
+ ValueType vt, RegisterClass RC, PatFrag mem_frag,
+ X86MemOperand x86memop> {
+def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (rep_frag
- VR128:$src, (undef))))]>;
-def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ [(set RC:$dst, (vt (OpNode RC:$src)))]>;
+def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (rep_frag
- (memopv4f32 addr:$src), (undef)))]>;
+ [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>;
}
-multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
- string OpcodeStr> {
-def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
-def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+let Predicates = [HasAVX] in {
+ defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v4f32, VR128, memopv4f32, f128mem>, VEX;
+ defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v4f32, VR128, memopv4f32, f128mem>, VEX;
+ defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v8f32, VR256, memopv8f32, f256mem>, VEX;
+ defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v8f32, VR256, memopv8f32, f256mem>, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
+ memopv4f32, f128mem>;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
+ memopv4f32, f128mem>;
+
+let Predicates = [HasSSE3] in {
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSLDUPrm addr:$src)>;
}
let Predicates = [HasAVX] in {
- // FIXME: Merge above classes when we have patterns for the ymm version
- defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
- defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
- defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
- defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (VMOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VMOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (VMOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VMOVSLDUPrm addr:$src)>;
+ def : Pat<(v8i32 (X86Movshdup VR256:$src)),
+ (VMOVSHDUPYrr VR256:$src)>;
+ def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VMOVSHDUPYrm addr:$src)>;
+ def : Pat<(v8i32 (X86Movsldup VR256:$src)),
+ (VMOVSLDUPYrr VR256:$src)>;
+ def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VMOVSLDUPYrm addr:$src)>;
}
-defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
-defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
-// Replicate Double FP
+//===---------------------------------------------------------------------===//
+// SSE3 - Replicate Double FP - MOVDDUP
+//===---------------------------------------------------------------------===//
+
multiclass sse3_replicate_dfp<string OpcodeStr> {
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3238,23 +4574,90 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
(undef))))]>;
}
+// FIXME: Merge with above classe when there're patterns for the ymm version
multiclass sse3_replicate_dfp_y<string OpcodeStr> {
-def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>;
-def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>;
+let Predicates = [HasAVX] in {
+ def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+ }
+}
+
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+
+let Predicates = [HasSSE3] in {
+ def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (MOVDDUPrm addr:$src)>;
+ let AddedComplexity = 5 in {
+ def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>;
+ }
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
}
let Predicates = [HasAVX] in {
- // FIXME: Merge above classes when we have patterns for the ymm version
- defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
- defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+ def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ let AddedComplexity = 5 in {
+ def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ }
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+
+ // 256-bit version
+ def : Pat<(X86Movddup (memopv4f64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (memopv4i64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4f64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
+ def : Pat<(X86Movddup (v4i64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
}
-defm MOVDDUP : sse3_replicate_dfp<"movddup">;
-// Move Unaligned Integer
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Unaligned Integer
+//===---------------------------------------------------------------------===//
+
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
@@ -3267,38 +4670,6 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
-def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
- (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
-// Several Move patterns
-let AddedComplexity = 5 in {
-def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-def : Pat<(movddup (memopv2i64 addr:$src), (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-}
-
-// vector_shuffle v1, <undef> <1, 1, 3, 3>
-let AddedComplexity = 15 in
-def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
- (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
-def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <0, 0, 2, 2>
-let AddedComplexity = 15 in
- def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
- (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
- def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
//===---------------------------------------------------------------------===//
@@ -3344,62 +4715,58 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
// Horizontal ops
multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
- X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
- X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
}
let Predicates = [HasAVX] in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V;
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
- int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V;
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V;
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
- int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V;
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V;
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
- int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V;
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
- int_x86_avx_hsub_pd_256, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
- int_x86_sse3_hadd_ps>;
- defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
- int_x86_sse3_hadd_pd>;
- defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
- int_x86_sse3_hsub_ps>;
- defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
- int_x86_sse3_hsub_pd>;
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
}
//===---------------------------------------------------------------------===//
@@ -3466,7 +4833,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let Predicates = [HasAVX] in {
+let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
@@ -3525,17 +4892,33 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
int_x86_ssse3_pmul_hr_sw_128>;
}
-def : Pat<(X86pshufb VR128:$src, VR128:$mask),
- (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
-def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
- (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+let Predicates = [HasSSSE3] in {
+ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>;
+ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>;
+
+ def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (PSIGNBrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (PSIGNWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (PSIGNDrr128 VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (VPSHUFBrr128 VR128:$src, VR128:$mask)>;
+ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (VPSHUFBrm128 VR128:$src, addr:$mask)>;
-def : Pat<(X86psignb VR128:$src1, VR128:$src2),
- (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
-def : Pat<(X86psignw VR128:$src1, VR128:$src2),
- (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
-def : Pat<(X86psignd VR128:$src1, VR128:$src2),
- (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+ def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
+}
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns
@@ -3560,33 +4943,35 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
let Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
-let Constraints = "$src1 = $dst" in
+let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
defm PALIGN : ssse3_palign<"palignr">;
-let AddedComplexity = 5 in {
-def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
- (PALIGNR128rr VR128:$src2, VR128:$src1,
- (SHUFFLE_get_palign_imm VR128:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)),
- (PALIGNR128rr VR128:$src2, VR128:$src1,
- (SHUFFLE_get_palign_imm VR128:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)),
- (PALIGNR128rr VR128:$src2, VR128:$src1,
- (SHUFFLE_get_palign_imm VR128:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
- (PALIGNR128rr VR128:$src2, VR128:$src1,
- (SHUFFLE_get_palign_imm VR128:$src3))>,
- Requires<[HasSSSE3]>;
+let Predicates = [HasSSSE3] in {
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
-// SSSE3 Misc Instructions
+// SSSE3 - Thread synchronization
//===---------------------------------------------------------------------===//
-// Thread synchronization
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
@@ -3609,338 +4994,6 @@ def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
Requires<[In64BitMode]>;
-//===---------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===---------------------------------------------------------------------===//
-
-// extload f32 -> f64. This matches load+fextend because we have a hack in
-// the isel (PreprocessForFPConvert) that can introduce loads after dag
-// combine.
-// Since these loads aren't folded into the fextend, we have to match it
-// explicitly here.
-let Predicates = [HasSSE2] in
- def : Pat<(fextend (loadf32 addr:$src)),
- (CVTSS2SDrm addr:$src)>;
-
-// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
-// in the non-AVX version bits 127:64 aren't touched. Find a better way to
-// represent this instead of always zeroing SRC1. One possible solution is
-// to represent the instruction w/ something similar as the "$src1 = $dst"
-// constraint but without the tied operands.
-let Predicates = [HasAVX] in
- def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
- addr:$src)>;
-
-// bit_convert
-let Predicates = [HasXMMInt] in {
- def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
-}
-
-let Predicates = [HasAVX] in {
- def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
-}
-
-// Move scalar to XMM zero-extended
-// movd to XMM register zero-extends
-let AddedComplexity = 15 in {
-// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
-def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
-def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
-def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0PS)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
-def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0PI)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
-}
-
-// Splat v2f64 / v2i64
-let AddedComplexity = 10 in {
-def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
- (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
- (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
- (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
- (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-}
-
-// Special unary SHUFPSrri case.
-def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPSrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
-let AddedComplexity = 5 in
-def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
- (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[HasSSE2]>;
-// Special unary SHUFPDrri case.
-def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-// Special unary SHUFPDrri case.
-def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-// Unary v4f32 shuffle with PSHUF* in order to fold a load.
-def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
- (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[HasSSE2]>;
-
-// Special binary v4i32 shuffle cases with SHUFPS.
-def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
- (SHUFPSrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (SHUFPSrmi VR128:$src1, addr:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-// Special binary v2i64 shuffle cases using SHUFPDrri.
-def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
- (SHUFPDrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-
-// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
-let AddedComplexity = 15 in {
-def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[OptForSpeed, HasSSE2]>;
-def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[OptForSpeed, HasSSE2]>;
-}
-let AddedComplexity = 10 in {
-def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
- (UNPCKLPSrr VR128:$src, VR128:$src)>;
-def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLBWrr VR128:$src, VR128:$src)>;
-def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLWDrr VR128:$src, VR128:$src)>;
-def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLDQrr VR128:$src, VR128:$src)>;
-}
-
-// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
-let AddedComplexity = 15 in {
-def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[OptForSpeed, HasSSE2]>;
-def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[OptForSpeed, HasSSE2]>;
-}
-let AddedComplexity = 10 in {
-def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
- (UNPCKHPSrr VR128:$src, VR128:$src)>;
-def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHBWrr VR128:$src, VR128:$src)>;
-def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHWDrr VR128:$src, VR128:$src)>;
-def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHDQrr VR128:$src, VR128:$src)>;
-}
-
-let AddedComplexity = 20 in {
-// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
-def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr VR128:$src1, VR128:$src2)>;
-
-// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
-def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
- (MOVHLPSrr VR128:$src1, VR128:$src2)>;
-
-// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
-def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
- (MOVHLPSrr VR128:$src1, VR128:$src1)>;
-def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
- (MOVHLPSrr VR128:$src1, VR128:$src1)>;
-}
-
-let AddedComplexity = 20 in {
-// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
-def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-}
-
-// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
- addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
-
-let AddedComplexity = 15 in {
-// Setting the lowest element in the vector.
-def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-
-// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
-def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
- Requires<[HasSSE2]>;
-def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
- Requires<[HasSSE2]>;
-}
-
-// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
-// fall back to this for SSE1)
-def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
- (SHUFPSrri VR128:$src2, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
-
-// Set lowest element and zero upper elements.
-def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
- (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
-
-// vector -> vector casts
-def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
-
-// Use movaps / movups for SSE integer load / store (one byte shorter).
-// The instructions selected below are then converted to MOVDQA/MOVDQU
-// during the SSE domain pass.
-let Predicates = [HasSSE1] in {
- def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>;
- def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>;
-
- def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-}
-
-// Use vmovaps/vmovups for AVX integer load/store.
-let Predicates = [HasAVX] in {
- // 128-bit load/store
- def : Pat<(alignedloadv4i32 addr:$src),
- (VMOVAPSrm addr:$src)>;
- def : Pat<(loadv4i32 addr:$src),
- (VMOVUPSrm addr:$src)>;
- def : Pat<(alignedloadv2i64 addr:$src),
- (VMOVAPSrm addr:$src)>;
- def : Pat<(loadv2i64 addr:$src),
- (VMOVUPSrm addr:$src)>;
-
- def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
-
- // 256-bit load/store
- def : Pat<(alignedloadv4i64 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(loadv4i64 addr:$src),
- (VMOVUPSYrm addr:$src)>;
- def : Pat<(alignedloadv8i32 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(loadv8i32 addr:$src),
- (VMOVUPSYrm addr:$src)>;
- def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v4i64 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v8i32 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
@@ -3979,36 +5032,71 @@ defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
-// Common patterns involving scalar load.
-def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load.
+ def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load.
+ def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (VPMOVSXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (VPMOVSXWDrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (VPMOVSXDQrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (VPMOVZXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (VPMOVZXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (VPMOVZXDQrm addr:$src)>;
+}
multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
@@ -4039,17 +5127,31 @@ defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
-// Common patterns involving scalar load
-def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
- (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
- (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVSXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVSXWQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVZXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVZXWQrm addr:$src)>;
+}
-def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
- (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
- (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (VPMOVSXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (VPMOVSXWQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (VPMOVZXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (VPMOVZXWQrm addr:$src)>;
+}
multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -4073,16 +5175,31 @@ defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
-// Common patterns involving scalar load
-def : Pat<(int_x86_sse41_pmovsxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBQrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovzxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXBQrm addr:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Extract Instructions
@@ -4208,7 +5325,12 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
imm:$src2))),
addr:$dst),
(EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[HasSSE41]>;
+ Requires<[HasSSE41]>;
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasAVX]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Insert Instructions
@@ -4297,7 +5419,7 @@ let Constraints = "$src1 = $dst" in
// in the target vector.
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -4306,7 +5428,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
(X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+ (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -4348,7 +5470,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
// Vector intrinsic operation, mem
def PSm : Ii8<opcps, MRMSrcMem,
- (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
@@ -4366,7 +5488,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
// Vector intrinsic operation, mem
def PDm : SS4AIi8<opcpd, MRMSrcMem,
- (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
@@ -4501,14 +5623,14 @@ let Predicates = [HasAVX] in {
int_x86_avx_round_pd_256>, VEX;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
- int_x86_sse41_round_sd, 0>, VEX_4V;
+ int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
// Instructions for the assembler
defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
VEX;
defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
VEX;
- defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
+ defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -4578,26 +5700,34 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
-def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
-def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
-
-def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
-def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
-
-def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
-def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
+let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
+ def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
+ OpSize, XS;
+ def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, OpSize, XS;
+
+ def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
+ XS;
+ def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
@@ -4666,6 +5796,11 @@ let Predicates = [HasAVX] in {
0>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
0>, VEX_4V;
+
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (VPCMPEQQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQQrm VR128:$src1, addr:$src2)>;
}
let Constraints = "$src1 = $dst" in {
@@ -4720,7 +5855,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, bit Is2Addr = 1> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, i32i8imm:$src3),
+ (ins RC:$src1, RC:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -4729,7 +5864,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
OpSize;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -4815,6 +5950,36 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
memopv32i8, int_x86_avx_blendv_ps_256>;
+let Predicates = [HasAVX] in {
+ def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
+ (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
+ (v4f32 VR128:$src2))),
+ (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
+ (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
+ (v2f64 VR128:$src2))),
+ (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
+ (v8i32 VR256:$src2))),
+ (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
+ (v8f32 VR256:$src2))),
+ (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
+ (v4i64 VR256:$src2))),
+ (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
+ (v4f64 VR256:$src2))),
+ (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+}
+
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
@@ -4835,12 +6000,27 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
}
}
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
-
-def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
- (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
+let Predicates = [HasSSE41] in {
+ def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
+ (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
+ (v4f32 VR128:$src2))),
+ (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
+ (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
+ (v2f64 VR128:$src2))),
+ (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+}
let Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -4876,9 +6056,16 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX] in {
defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
0>, VEX_4V;
+
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (VPCMPGTQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTQrm VR128:$src1, addr:$src2)>;
+}
+
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
@@ -5158,22 +6345,43 @@ let Constraints = "$src1 = $dst" in {
int_x86_aesni_aesdeclast>;
}
-def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
- (AESENCrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
- (AESENCrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
- (AESENCLASTrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
- (AESENCLASTrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
- (AESDECrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
- (AESDECrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
- (AESDECLASTrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
- (AESDECLASTrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasAES] in {
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (AESENCrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (AESENCrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (AESENCLASTrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (AESDECrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (AESDECrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (AESDECLASTrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (VAESENCrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (VAESENCrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (VAESENCLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (VAESENCLASTrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (VAESDECrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (VAESDECrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (VAESDECLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (VAESDECLASTrm VR128:$src1, addr:$src2)>;
+}
// Perform the AES InvMixColumn Transformation
let Predicates = [HasAVX, HasAES] in {
@@ -5288,8 +6496,10 @@ defm : pclmul_alias<"lqlq", 0x00>;
// AVX Instructions
//===----------------------------------------------------------------------===//
-
-// Load from memory and broadcast to all elements of the destination operand
+//===----------------------------------------------------------------------===//
+// VBROADCAST - Load from memory and broadcast to all elements of the
+// destination operand
+//
class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int> :
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
@@ -5305,7 +6515,26 @@ def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256>;
-// Insert packed floating-point values
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSY addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VBROADCASTSD addr:$src)>;
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSY addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSD addr:$src)>;
+
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSS addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSS addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// VINSERTF128 - Insert packed floating-point values
+//
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -5315,7 +6544,41 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
-// Extract packed floating-point values
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTF128 - Extract packed floating-point values
+//
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -5325,7 +6588,41 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
-// Conditional SIMD Packed Loads and Stores
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTF128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTF128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+//===----------------------------------------------------------------------===//
+// VMASKMOV - Conditional SIMD Packed Loads and Stores
+//
multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
Intrinsic IntLd, Intrinsic IntLd256,
Intrinsic IntSt, Intrinsic IntSt256,
@@ -5363,7 +6660,9 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
int_x86_avx_maskstore_pd_256,
memopv2f64, memopv4f64>;
-// Permute Floating-Point Values
+//===----------------------------------------------------------------------===//
+// VPERMIL - Permute Single and Double Floating-Point Values
+//
multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop_f,
X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
@@ -5404,6 +6703,18 @@ defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
int_x86_avx_vpermilvar_pd_256,
int_x86_avx_vpermil_pd_256>;
+def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPSYri VR256:$src1, imm:$imm)>;
+def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPSYri VR256:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPDYri VR256:$src1, imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+//
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -5413,65 +6724,6 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
-// Zero All YMM registers
-def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
- [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
-
-// Zero Upper bits of YMM registers
-def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
- [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
-
-def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-
-def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4f32 (VEXTRACTF128rr
- (v8f32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2f64 (VEXTRACTF128rr
- (v4f64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2i64 (VEXTRACTF128rr
- (v4i64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-
-def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
- (VBROADCASTF128 addr:$src)>;
-
def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
@@ -5489,377 +6741,59 @@ def : Pat<(int_x86_avx_vperm2f128_si_256
VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
//===----------------------------------------------------------------------===//
-// SSE Shuffle pattern fragments
-//===----------------------------------------------------------------------===//
+// VZERO - Zero YMM registers
+//
+let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
+ // Zero All YMM registers
+ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>;
-// This is part of a "work in progress" refactoring. The idea is that all
-// vector shuffles are going to be translated into target specific nodes and
-// directly matched by the patterns below (which can be changed along the way)
-// The AVX version of some but not all of them are described here, and more
-// should come in a near future.
-
-// Shuffle with PSHUFD instruction folding loads. The first two patterns match
-// SSE2 loads, which are always promoted to v2i64. The last one should match
-// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
-// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
-// we investigate further.
-def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
- (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
- (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
-def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
- (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
-
-// Shuffle with PSHUFD instruction.
-def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
-
-def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
-
-// Shuffle with SHUFPD instruction.
-def : Pat<(v2f64 (X86Shufps VR128:$src1,
- (memopv2f64 addr:$src2), (i8 imm:$imm))),
- (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Shufps VR128:$src1,
- (memopv2f64 addr:$src2), (i8 imm:$imm))),
- (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
-
-def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
-
-// Shuffle with SHUFPS instruction.
-def : Pat<(v4f32 (X86Shufps VR128:$src1,
- (memopv4f32 addr:$src2), (i8 imm:$imm))),
- (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Shufps VR128:$src1,
- (memopv4f32 addr:$src2), (i8 imm:$imm))),
- (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
-
-def : Pat<(v4i32 (X86Shufps VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
- (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4i32 (X86Shufps VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
- (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
-
-// Shuffle with MOVHLPS instruction
-def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
- (MOVHLPSrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
- (MOVHLPSrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with MOVDDUP instruction
-def : Pat<(X86Movddup (memopv2f64 addr:$src)),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (memopv2f64 addr:$src)),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (MOVDDUPrm addr:$src)>;
-
-
-// Shuffle with UNPCKLPS
-def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
- (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
- (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with UNPCKHPS
-def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
- (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
- (UNPCKHPSrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
- (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
- (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with UNPCKLPD
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPDrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
- (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
- (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with UNPCKHPD
-def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKHPDrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
- (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
- (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKLBW
-def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2)))),
- (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
- (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKLWD
-def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2)))),
- (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
- (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKLDQ
-def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)))),
- (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
- (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKLQDQ
-def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
- (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
- (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKHBW
-def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2)))),
- (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
- (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKHWD
-def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2)))),
- (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
- (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKHDQ
-def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)))),
- (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
- (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKHQDQ
-def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
- (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
- (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with MOVLHPS
-def : Pat<(X86Movlhps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
- (MOVHPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(X86Movlhps VR128:$src1,
- (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (MOVHPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
-
-// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
-// is during lowering, where it's not possible to recognize the load fold cause
-// it has two uses through a bitcast. One use disappears at isel time and the
-// fold opportunity reappears.
-def : Pat<(v2f64 (X86Movddup VR128:$src)),
- (UNPCKLPDrr VR128:$src, VR128:$src)>;
-
-// Shuffle with MOVLHPD
-def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (MOVHPDrm VR128:$src1, addr:$src2)>;
-
-// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
-// is during lowering, where it's not possible to recognize the load fold cause
-// it has two uses through a bitcast. One use disappears at isel time and the
-// fold opportunity reappears.
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (MOVHPDrm VR128:$src1, addr:$src2)>;
-
-// Shuffle with MOVSS
-def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
- (MOVSSrr VR128:$src1, FR32:$src2)>;
-def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
-// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
-// is during lowering, where it's not possible to recognize the load fold cause
-// it has two uses through a bitcast. One use disappears at isel time and the
-// fold opportunity reappears.
-def : Pat<(X86Movss VR128:$src1,
- (bc_v4i32 (v2i64 (load addr:$src2)))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-
-// Shuffle with MOVSD
-def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
- (MOVSDrr VR128:$src1, FR64:$src2)>;
-def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
-def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
-
-// Shuffle with MOVSHDUP
-def : Pat<(v4i32 (X86Movshdup VR128:$src)),
- (MOVSHDUPrr VR128:$src)>;
-def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
- (MOVSHDUPrm addr:$src)>;
-
-def : Pat<(v4f32 (X86Movshdup VR128:$src)),
- (MOVSHDUPrr VR128:$src)>;
-def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
- (MOVSHDUPrm addr:$src)>;
-
-// Shuffle with MOVSLDUP
-def : Pat<(v4i32 (X86Movsldup VR128:$src)),
- (MOVSLDUPrr VR128:$src)>;
-def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
- (MOVSLDUPrm addr:$src)>;
-
-def : Pat<(v4f32 (X86Movsldup VR128:$src)),
- (MOVSLDUPrr VR128:$src)>;
-def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
- (MOVSLDUPrm addr:$src)>;
-
-// Shuffle with PSHUFHW
-def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
- (PSHUFHWri VR128:$src, imm:$imm)>;
-def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
- (PSHUFHWmi addr:$src, imm:$imm)>;
-
-// Shuffle with PSHUFLW
-def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
- (PSHUFLWri VR128:$src, imm:$imm)>;
-def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
- (PSHUFLWmi addr:$src, imm:$imm)>;
-
-// Shuffle with PALIGN
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ // Zero Upper bits of YMM registers
+ def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>;
+}
-// Shuffle with MOVLPS
-def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(X86Movlps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
-// is during lowering, where it's not possible to recognize the load fold cause
-// it has two uses through a bitcast. One use disappears at isel time and the
-// fold opportunity reappears.
-def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
-
-def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
-
-// Shuffle with MOVLPD
-def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2f64 (X86Movlpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-
-// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
-def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
- (MOVHPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
- (MOVHPDmr addr:$dst, VR128:$src)>;
-
-def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v4i32 (X86Movlps
- (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-
-def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
+//===----------------------------------------------------------------------===//
+// Half precision conversion instructions
+//
+let Predicates = [HasAVX, HasF16C] in {
+ def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst),
+ (ins VR256:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 31de878..05a5b36 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -67,43 +67,43 @@ def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
//
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins),
- "in{b}\t{%dx, %al|%AL, %DX}", []>;
+ "in{b}\t{%dx, %al|AL, DX}", []>;
let Defs = [AX], Uses = [DX] in
def IN16rr : I<0xED, RawFrm, (outs), (ins),
- "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
+ "in{w}\t{%dx, %ax|AX, DX}", []>, OpSize;
let Defs = [EAX], Uses = [DX] in
def IN32rr : I<0xED, RawFrm, (outs), (ins),
- "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+ "in{l}\t{%dx, %eax|EAX, DX}", []>;
let Defs = [AL] in
def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
- "in{b}\t{$port, %al|%AL, $port}", []>;
+ "in{b}\t{$port, %al|AL, $port}", []>;
let Defs = [AX] in
def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
- "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+ "in{w}\t{$port, %ax|AX, $port}", []>, OpSize;
let Defs = [EAX] in
def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
- "in{l}\t{$port, %eax|%EAX, $port}", []>;
+ "in{l}\t{$port, %eax|EAX, $port}", []>;
let Uses = [DX, AL] in
def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
- "out{b}\t{%al, %dx|%DX, %AL}", []>;
+ "out{b}\t{%al, %dx|DX, AL}", []>;
let Uses = [DX, AX] in
def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
- "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+ "out{w}\t{%ax, %dx|DX, AX}", []>, OpSize;
let Uses = [DX, EAX] in
def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
- "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+ "out{l}\t{%eax, %dx|DX, EAX}", []>;
let Uses = [AL] in
def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
- "out{b}\t{%al, $port|$port, %AL}", []>;
+ "out{b}\t{%al, $port|$port, AL}", []>;
let Uses = [AX] in
def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
- "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+ "out{w}\t{%ax, $port|$port, AX}", []>, OpSize;
let Uses = [EAX] in
def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
- "out{l}\t{%eax, $port|$port, %EAX}", []>;
+ "out{l}\t{%eax, $port|$port, EAX}", []>;
def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize;
@@ -229,65 +229,65 @@ def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
"ltr{w}\t{$src}", []>, TB;
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
- "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize;
def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
- "push{l}\t%cs", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%cs|CS}", []>, Requires<[In32BitMode]>;
def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
- "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%ss|SS}", []>, Requires<[In32BitMode]>, OpSize;
def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
- "push{l}\t%ss", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%ss|SS}", []>, Requires<[In32BitMode]>;
def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
- "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%ds|DS}", []>, Requires<[In32BitMode]>, OpSize;
def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
- "push{l}\t%ds", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%ds|DS}", []>, Requires<[In32BitMode]>;
def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
- "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%es|ES}", []>, Requires<[In32BitMode]>, OpSize;
def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
- "push{l}\t%es", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%es|ES}", []>, Requires<[In32BitMode]>;
def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
- "push{w}\t%fs", []>, OpSize, TB;
+ "push{w}\t{%fs|FS}", []>, OpSize, TB;
def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
- "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>;
+ "push{l}\t{%fs|FS}", []>, TB, Requires<[In32BitMode]>;
def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
- "push{w}\t%gs", []>, OpSize, TB;
+ "push{w}\t{%gs|GS}", []>, OpSize, TB;
def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
- "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>;
+ "push{l}\t{%gs|GS}", []>, TB, Requires<[In32BitMode]>;
def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
- "push{q}\t%fs", []>, TB;
+ "push{q}\t{%fs|FS}", []>, TB;
def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
- "push{q}\t%gs", []>, TB;
+ "push{q}\t{%gs|GS}", []>, TB;
// No "pop cs" instruction.
def POPSS16 : I<0x17, RawFrm, (outs), (ins),
- "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%ss|SS}", []>, OpSize, Requires<[In32BitMode]>;
def POPSS32 : I<0x17, RawFrm, (outs), (ins),
- "pop{l}\t%ss", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%ss|SS}", []> , Requires<[In32BitMode]>;
def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
- "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%ds|DS}", []>, OpSize, Requires<[In32BitMode]>;
def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
- "pop{l}\t%ds", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%ds|DS}", []> , Requires<[In32BitMode]>;
def POPES16 : I<0x07, RawFrm, (outs), (ins),
- "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%es|ES}", []>, OpSize, Requires<[In32BitMode]>;
def POPES32 : I<0x07, RawFrm, (outs), (ins),
- "pop{l}\t%es", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%es|ES}", []> , Requires<[In32BitMode]>;
def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
- "pop{w}\t%fs", []>, OpSize, TB;
+ "pop{w}\t{%fs|FS}", []>, OpSize, TB;
def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
- "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>;
+ "pop{l}\t{%fs|FS}", []>, TB , Requires<[In32BitMode]>;
def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
- "pop{q}\t%fs", []>, TB;
+ "pop{q}\t{%fs|FS}", []>, TB;
def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
- "pop{w}\t%gs", []>, OpSize, TB;
+ "pop{w}\t{%gs|GS}", []>, OpSize, TB;
def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
- "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>;
+ "pop{l}\t{%gs|GS}", []>, TB , Requires<[In32BitMode]>;
def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
- "pop{q}\t%gs", []>, TB;
+ "pop{q}\t{%gs|GS}", []>, TB;
def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
@@ -400,12 +400,29 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+//===----------------------------------------------------------------------===//
+// XSAVE instructions
let Defs = [RDX, RAX], Uses = [RCX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
let Uses = [RDX, RAX, RCX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+let Uses = [RDX, RAX] in {
+ def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
+ "xsave\t$dst", []>, TB;
+ def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
+ "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xrstor\t$dst", []>, TB;
+ def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
+ "xsaveopt\t$dst", []>, TB;
+ def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
+ "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+}
+
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
let Defs = [RAX, RDI], Uses = [RDX, RDI] in
@@ -427,3 +444,24 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
}
let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
+
+//===----------------------------------------------------------------------===//
+// FS/GS Base Instructions
+let Predicates = [In64BitMode] in {
+ def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
+ "rdfsbase{l}\t$dst", []>, TB, XS;
+ def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
+ "rdfsbase{q}\t$dst", []>, TB, XS;
+ def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
+ "rdgsbase{l}\t$dst", []>, TB, XS;
+ def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
+ "rdgsbase{q}\t$dst", []>, TB, XS;
+ def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$dst),
+ "wrfsbase{l}\t$dst", []>, TB, XS;
+ def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$dst),
+ "wrfsbase{q}\t$dst", []>, TB, XS;
+ def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$dst),
+ "wrgsbase{l}\t$dst", []>, TB, XS;
+ def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$dst),
+ "wrgsbase{q}\t$dst", []>, TB, XS;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
index daf61e4..09a7a7d0c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
@@ -16,9 +16,15 @@
// VMX instructions
// 66 0F 38 80
-def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
+def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
// 66 0F 38 81
-def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
+def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index e385335..50bc14d 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -372,15 +372,10 @@ ReSimplify:
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
- case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
- case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
- case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
- case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
- case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
- case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
@@ -468,6 +463,18 @@ ReSimplify:
case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
+ // Atomic load and store require a separate pseudo-inst because Acquire
+ // implies mayStore and Release implies mayLoad; fix these to regular MOV
+ // instructions here
+ case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
+ case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
+ case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
+ case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
+ case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
+
// We don't currently select the correct instruction form for instructions
// which have a short %eax, etc. form. Handle this by custom lowering, for
// now.
@@ -585,6 +592,8 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ OutStreamer.EmitCodeRegion();
+
X86MCInstLower MCInstLowering(Mang, *MF, *this);
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
@@ -601,7 +610,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (OutStreamer.hasRawTextSupport())
OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
return;
-
+
case X86::EH_RETURN:
case X86::EH_RETURN64: {
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 06043ec..b0bb313 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -53,10 +53,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// relocation models.
unsigned GlobalBaseReg;
- /// ReserveFP - whether the function should reserve the frame pointer
- /// when allocating, even if there may not actually be a frame pointer used.
- bool ReserveFP;
-
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
/// RegSaveFrameIndex - X86-64 vararg func register save area.
@@ -65,6 +61,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
unsigned VarArgsGPOffset;
/// VarArgsFPOffset - X86-64 vararg func fp reg offset.
unsigned VarArgsFPOffset;
+ /// ArgumentStackSize - The number of bytes on stack consumed by the arguments
+ /// being passed on the stack.
+ unsigned ArgumentStackSize;
public:
X86MachineFunctionInfo() : ForceFramePointer(false),
@@ -77,7 +76,8 @@ public:
VarArgsFrameIndex(0),
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
- VarArgsFPOffset(0) {}
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0) {}
explicit X86MachineFunctionInfo(MachineFunction &MF)
: ForceFramePointer(false),
@@ -87,11 +87,11 @@ public:
TailCallReturnAddrDelta(0),
SRetReturnReg(0),
GlobalBaseReg(0),
- ReserveFP(false),
VarArgsFrameIndex(0),
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
- VarArgsFPOffset(0) {}
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0) {}
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -114,9 +114,6 @@ public:
unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
- bool getReserveFP() const { return ReserveFP; }
- void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; }
-
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
@@ -128,6 +125,9 @@ public:
unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
+
+ unsigned getArgumentStackSize() const { return ArgumentStackSize; }
+ void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index f2faf59..c1ac9f3 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -53,7 +52,13 @@ ForceStackAlign("force-align-stack",
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
- : X86GenRegisterInfo(), TM(tm), TII(tii) {
+ : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP,
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)),
+ TM(tm), TII(tii) {
+ X86_MC::InitLLVM2SEHRegisterMapping(this);
+
// Cache some information.
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
Is64Bit = Subtarget->is64Bit();
@@ -70,40 +75,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
}
}
-static unsigned getFlavour(const X86Subtarget *Subtarget, bool isEH) {
- if (!Subtarget->is64Bit()) {
- if (Subtarget->isTargetDarwin()) {
- if (isEH)
- return DWARFFlavour::X86_32_DarwinEH;
- else
- return DWARFFlavour::X86_32_Generic;
- } else if (Subtarget->isTargetCygMing()) {
- // Unsupported by now, just quick fallback
- return DWARFFlavour::X86_32_Generic;
- } else {
- return DWARFFlavour::X86_32_Generic;
- }
- }
- return DWARFFlavour::X86_64;
-}
-
-/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
-/// specific numbering, used in debug info and exception tables.
-int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- unsigned Flavour = getFlavour(Subtarget, isEH);
-
- return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
-}
-
-/// getLLVMRegNum - This function maps DWARF register numbers to LLVM register.
-int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- unsigned Flavour = getFlavour(Subtarget, isEH);
-
- return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour);
-}
-
/// getCompactUnwindRegNum - This function maps the register to the number for
/// compact unwind encoding. Return -1 if the register isn't valid.
int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
@@ -121,7 +92,7 @@ int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
int
X86RegisterInfo::getSEHRegNum(unsigned i) const {
- int reg = getX86RegNum(i);
+ int reg = X86_MC::getX86RegNum(i);
switch (i) {
case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
@@ -140,96 +111,16 @@ X86RegisterInfo::getSEHRegNum(unsigned i) const {
return reg;
}
-/// getX86RegNum - This function maps LLVM register identifiers to their X86
-/// specific numbering, which is used in various places encoding instructions.
-unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
- switch(RegNo) {
- case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
- case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
- case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
- case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
- case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
- return N86::ESP;
- case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
- return N86::EBP;
- case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
- return N86::ESI;
- case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
- return N86::EDI;
-
- case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
- return N86::EAX;
- case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
- return N86::ECX;
- case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
- return N86::EDX;
- case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
- return N86::EBX;
- case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
- return N86::ESP;
- case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
- return N86::EBP;
- case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
- return N86::ESI;
- case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
- return N86::EDI;
-
- case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
- case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
- return RegNo-X86::ST0;
-
- case X86::XMM0: case X86::XMM8:
- case X86::YMM0: case X86::YMM8: case X86::MM0:
- return 0;
- case X86::XMM1: case X86::XMM9:
- case X86::YMM1: case X86::YMM9: case X86::MM1:
- return 1;
- case X86::XMM2: case X86::XMM10:
- case X86::YMM2: case X86::YMM10: case X86::MM2:
- return 2;
- case X86::XMM3: case X86::XMM11:
- case X86::YMM3: case X86::YMM11: case X86::MM3:
- return 3;
- case X86::XMM4: case X86::XMM12:
- case X86::YMM4: case X86::YMM12: case X86::MM4:
- return 4;
- case X86::XMM5: case X86::XMM13:
- case X86::YMM5: case X86::YMM13: case X86::MM5:
- return 5;
- case X86::XMM6: case X86::XMM14:
- case X86::YMM6: case X86::YMM14: case X86::MM6:
- return 6;
- case X86::XMM7: case X86::XMM15:
- case X86::YMM7: case X86::YMM15: case X86::MM7:
- return 7;
-
- case X86::ES: return 0;
- case X86::CS: return 1;
- case X86::SS: return 2;
- case X86::DS: return 3;
- case X86::FS: return 4;
- case X86::GS: return 5;
-
- case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
- case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
- case X86::CR2: case X86::CR10: case X86::DR2: return 2;
- case X86::CR3: case X86::CR11: case X86::DR3: return 3;
- case X86::CR4: case X86::CR12: case X86::DR4: return 4;
- case X86::CR5: case X86::CR13: case X86::DR5: return 5;
- case X86::CR6: case X86::CR14: case X86::DR6: return 6;
- case X86::CR7: case X86::CR15: case X86::DR7: return 7;
-
- // Pseudo index registers are equivalent to a "none"
- // scaled index (See Intel Manual 2A, table 2-3)
- case X86::EIZ:
- case X86::RIZ:
- return 4;
-
- default:
- assert(isVirtualRegister(RegNo) && "Unknown physical register!");
- llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
- return 0;
- }
+const TargetRegisterClass *
+X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
+ unsigned Idx) const {
+ // The sub_8bit sub-register index is more constrained in 32-bit mode.
+ // It behaves just like the sub_8bit_hi index.
+ if (!Is64Bit && Idx == X86::sub_8bit)
+ Idx = X86::sub_8bit_hi;
+
+ // Forward to TableGen's default version.
+ return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
}
const TargetRegisterClass *
@@ -355,8 +246,19 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass*
X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+ // Don't allow super-classes of GR8_NOREX. This class is only used after
+ // extrating sub_8bit_hi sub-registers. The H sub-registers cannot be copied
+ // to the full GR8 register class in 64-bit mode, so we cannot allow the
+ // reigster class inflation.
+ //
+ // The GR8_NOREX class is always used in a way that won't be constrained to a
+ // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
+ // full GR8 class.
+ if (RC == X86::GR8_NOREXRegisterClass)
+ return RC;
+
const TargetRegisterClass *Super = RC;
- TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
do {
switch (Super->getID()) {
case X86::GR8RegClassID:
@@ -741,11 +643,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-unsigned X86RegisterInfo::getRARegister() const {
- return Is64Bit ? X86::RIP // Should have dwarf #16.
- : X86::EIP; // Should have dwarf #8.
-}
-
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return TFI->hasFP(MF) ? FramePtr : StackPtr;
@@ -948,7 +845,7 @@ namespace {
for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
- FuncInfo->setReserveFP(true);
+ FuncInfo->setForceFramePointer(true);
return true;
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index a12eb12..7d39c68 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -24,22 +24,6 @@ namespace llvm {
class TargetInstrInfo;
class X86TargetMachine;
-/// N86 namespace - Native X86 register numbers
-///
-namespace N86 {
- enum {
- EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
- };
-}
-
-/// DWARFFlavour - Flavour of dwarf regnumbers
-///
-namespace DWARFFlavour {
- enum {
- X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
- };
-}
-
class X86RegisterInfo : public X86GenRegisterInfo {
public:
X86TargetMachine &TM;
@@ -73,11 +57,6 @@ public:
/// register identifier.
static unsigned getX86RegNum(unsigned RegNo);
- /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
- /// (created by TableGen) for target dependencies.
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
@@ -95,6 +74,9 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
+ virtual const TargetRegisterClass *
+ getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const;
+
const TargetRegisterClass*
getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
@@ -136,7 +118,6 @@ public:
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getStackRegister() const { return StackPtr; }
// FIXME: Move to FrameInfok
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index 203722a..9a7db36 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -390,6 +390,13 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
(GR32_NOREX sub_32bit)];
}
+// GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit
+// mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs
+// to clear upper 32-bits of RAX so is not a NOP.
+def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
// GR32_NOSP - GR32 registers except ESP.
def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
@@ -455,8 +462,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
}
-def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
- (sequence "YMM%u", 0, 15)> {
+def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ 256, (sequence "YMM%u", 0, 15)> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
}
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 02754f9..6406bce 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -54,7 +54,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
EVT IntPtr = TLI.getPointerTy();
- const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 5e6c659..7064dd0 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -16,9 +16,11 @@
#include "X86InstrInfo.h"
#include "llvm/GlobalValue.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
@@ -185,24 +187,53 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
- if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV);
- if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX);
- if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1);
- if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2);
- if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3);
- if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);
- if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);
- if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);
+ if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
+ if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); }
+ if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); }
+ if ((EDX >> 26) & 1) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); }
+ if (ECX & 0x1) { X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); }
+ if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
+ if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
+ if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
// FIXME: AVX codegen support is not ready.
- //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX);
+ //if ((ECX >> 28) & 1) { HasAVX = true; ToggleFeature(X86::FeatureAVX); }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
- HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL);
- HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3);
- HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT);
- HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES);
+ if (IsIntel && ((ECX >> 1) & 0x1)) {
+ HasCLMUL = true;
+ ToggleFeature(X86::FeatureCLMUL);
+ }
+ if (IsIntel && ((ECX >> 12) & 0x1)) {
+ HasFMA3 = true;
+ ToggleFeature(X86::FeatureFMA3);
+ }
+ if (IsIntel && ((ECX >> 22) & 0x1)) {
+ HasMOVBE = true;
+ ToggleFeature(X86::FeatureMOVBE);
+ }
+ if (IsIntel && ((ECX >> 23) & 0x1)) {
+ HasPOPCNT = true;
+ ToggleFeature(X86::FeaturePOPCNT);
+ }
+ if (IsIntel && ((ECX >> 25) & 0x1)) {
+ HasAES = true;
+ ToggleFeature(X86::FeatureAES);
+ }
+ if (IsIntel && ((ECX >> 29) & 0x1)) {
+ HasF16C = true;
+ ToggleFeature(X86::FeatureF16C);
+ }
+ if (IsIntel && ((ECX >> 30) & 0x1)) {
+ HasRDRAND = true;
+ ToggleFeature(X86::FeatureRDRAND);
+ }
+
+ if ((ECX >> 13) & 0x1) {
+ HasCmpxchg16b = true;
+ ToggleFeature(X86::FeatureCMPXCHG16B);
+ }
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
@@ -224,6 +255,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasX86_64 = true;
ToggleFeature(X86::Feature64Bit);
}
+ if ((ECX >> 5) & 0x1) {
+ HasLZCNT = true;
+ ToggleFeature(X86::FeatureLZCNT);
+ }
if (IsAMD && ((ECX >> 6) & 0x1)) {
HasSSE4A = true;
ToggleFeature(X86::FeatureSSE4A);
@@ -251,14 +286,21 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, HasCLMUL(false)
, HasFMA3(false)
, HasFMA4(false)
+ , HasMOVBE(false)
+ , HasRDRAND(false)
+ , HasF16C(false)
+ , HasLZCNT(false)
+ , HasBMI(false)
, IsBTMemSlow(false)
, IsUAMemFast(false)
, HasVectorUAMem(false)
+ , HasCmpxchg16b(false)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?
, MaxInlineSizeThreshold(128)
, TargetTriple(TT)
- , In64BitMode(is64Bit) {
+ , In64BitMode(is64Bit)
+ , InNaClMode(false) {
// Determine default and user specified characteristics
if (!FS.empty() || !CPU.empty()) {
std::string CPUName = CPU;
@@ -304,6 +346,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
if (In64BitMode)
ToggleFeature(X86::Mode64Bit);
+ if (isTargetNaCl()) {
+ InNaClMode = true;
+ ToggleFeature(X86::ModeNaCl);
+ }
+
if (HasAVX)
X86SSELevel = NoMMXSSE;
@@ -313,6 +360,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
+ if(EnableSegmentedStacks && !isTargetELF())
+ report_fatal_error("Segmented stacks are only implemented on ELF.");
+
// Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 6d22027..3258d3d 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -90,6 +90,21 @@ protected:
/// HasFMA4 - Target has 4-operand fused multiply-add
bool HasFMA4;
+ /// HasMOVBE - True if the processor has the MOVBE instruction.
+ bool HasMOVBE;
+
+ /// HasRDRAND - True if the processor has the RDRAND instruction.
+ bool HasRDRAND;
+
+ /// HasF16C - Processor has 16-bit floating point conversion instructions.
+ bool HasF16C;
+
+ /// HasLZCNT - Processor has LZCNT instruction.
+ bool HasLZCNT;
+
+ /// HasBMI - Processor has BMI1 instructions.
+ bool HasBMI;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -100,6 +115,10 @@ protected:
/// operands. This may require setting a feature bit in the processor.
bool HasVectorUAMem;
+ /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
+ /// this is true for most x86-64 chips, but not the first AMD chips.
+ bool HasCmpxchg16b;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -115,6 +134,9 @@ private:
/// In64BitMode - True if compiling for 64-bit, false for 32-bit.
bool In64BitMode;
+ /// InNaClMode - True if compiling for Native Client target.
+ bool InNaClMode;
+
public:
/// This constructor initializes the data members to match that
@@ -165,9 +187,15 @@ public:
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
+ bool hasMOVBE() const { return HasMOVBE; }
+ bool hasRDRAND() const { return HasRDRAND; }
+ bool hasF16C() const { return HasF16C; }
+ bool hasLZCNT() const { return HasLZCNT; }
+ bool hasBMI() const { return HasBMI; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
+ bool hasCmpxchg16b() const { return HasCmpxchg16b; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -185,6 +213,11 @@ public:
return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
}
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+ bool isTargetNaCl() const {
+ return TargetTriple.getOS() == Triple::NativeClient;
+ }
+ bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
+ bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
@@ -199,7 +232,8 @@ public:
}
bool isTargetWin64() const {
- return In64BitMode && (isTargetMingw() || isTargetWindows());
+ // FIXME: x86_64-cygwin has not been released yet.
+ return In64BitMode && (isTargetCygMing() || isTargetWindows());
}
bool isTargetEnvMacho() const {
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 9cab0e0..15c6c4e 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -16,65 +16,32 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
- MCContext &Ctx, TargetAsmBackend &TAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
- return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
-
- if (TheTriple.isOSWindows())
- return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
-
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
-}
-
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
-
- // Register the code emitter.
- TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
- createX86MCCodeEmitter);
- TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
- createX86MCCodeEmitter);
-
- // Register the asm backend.
- TargetRegistry::RegisterAsmBackend(TheX86_32Target,
- createX86_32AsmBackend);
- TargetRegistry::RegisterAsmBackend(TheX86_64Target,
- createX86_64AsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
- createMCStreamer);
- TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
- createMCStreamer);
}
-X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : X86TargetMachine(T, TT, CPU, FS, false),
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : X86TargetMachine(T, TT, CPU, FS, RM, CM, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
- "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
+ "n8:16:32-S128" :
(getSubtargetImpl()->isTargetCygMing() ||
getSubtargetImpl()->isTargetWindows()) ?
- "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
- "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-"
+ "n8:16:32-S32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
+ "n8:16:32-S128"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
@@ -82,11 +49,12 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
}
-X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : X86TargetMachine(T, TT, CPU, FS, true),
- DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : X86TargetMachine(T, TT, CPU, FS, RM, CM, true),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
@@ -95,52 +63,14 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
FrameLowering(*this, Subtarget),
ELFWriterInfo(is64Bit, true) {
- DefRelocModel = getRelocationModel();
-
- // If no relocation model was picked, default as appropriate for the target.
- if (getRelocationModel() == Reloc::Default) {
- // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
- // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
- // use static relocation model by default.
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.is64Bit())
- setRelocationModel(Reloc::PIC_);
- else
- setRelocationModel(Reloc::DynamicNoPIC);
- } else if (Subtarget.isTargetWin64())
- setRelocationModel(Reloc::PIC_);
- else
- setRelocationModel(Reloc::Static);
- }
-
- assert(getRelocationModel() != Reloc::Default &&
- "Relocation mode not picked");
-
- // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
- // is defined as a model for code which may be used in static or dynamic
- // executables but not necessarily a shared library. On X86-32 we just
- // compile in -static mode, in x86-64 we use PIC.
- if (getRelocationModel() == Reloc::DynamicNoPIC) {
- if (is64Bit)
- setRelocationModel(Reloc::PIC_);
- else if (!Subtarget.isTargetDarwin())
- setRelocationModel(Reloc::Static);
- }
-
- // If we are on Darwin, disallow static relocation model in X86-64 mode, since
- // the Mach-O file format doesn't support it.
- if (getRelocationModel() == Reloc::Static &&
- Subtarget.isTargetDarwin() &&
- is64Bit)
- setRelocationModel(Reloc::PIC_);
-
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
@@ -161,16 +91,20 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget.setPICStyle(PICStyles::GOT);
}
- // Finally, if we have "none" as our PIC style, force to static mode.
- if (Subtarget.getPICStyle() == PICStyles::None)
- setRelocationModel(Reloc::Static);
-
// default to hard float ABI
if (FloatABIType == FloatABI::Default)
FloatABIType = FloatABI::Hard;
}
//===----------------------------------------------------------------------===//
+// Command line options for x86
+//===----------------------------------------------------------------------===//
+static cl::opt<bool>
+UseVZeroUpper("x86-use-vzeroupper",
+ cl::desc("Minimize AVX to SSE transition penalty"),
+ cl::init(false));
+
+//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
@@ -200,46 +134,25 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
- PM.add(createSSEDomainFixPass());
- return true;
+ bool ShouldPrint = false;
+ if (OptLevel != CodeGenOpt::None &&
+ (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
+ PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ ShouldPrint = true;
}
- return false;
+
+ if (Subtarget.hasAVX() && UseVZeroUpper) {
+ PM.add(createX86IssueVZeroUpperPass());
+ ShouldPrint = true;
+ }
+
+ return ShouldPrint;
}
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
- // FIXME: Move this to TargetJITInfo!
- // On Darwin, do not override 64-bit setting made in X86TargetMachine().
- if (DefRelocModel == Reloc::Default &&
- (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
- setRelocationModel(Reloc::Static);
- Subtarget.setPICStyle(PICStyles::None);
- }
-
-
PM.add(createX86JITCodeEmitterPass(*this, JCE));
return false;
}
-
-void X86TargetMachine::setCodeModelForStatic() {
-
- if (getCodeModel() != CodeModel::Default) return;
-
- // For static codegen, if we're not already set, use Small codegen.
- setCodeModel(CodeModel::Small);
-}
-
-
-void X86TargetMachine::setCodeModelForJIT() {
-
- if (getCodeModel() != CodeModel::Default) return;
-
- // 64-bit JIT places everything in the same buffer except external functions.
- if (Subtarget.is64Bit())
- setCodeModel(CodeModel::Large);
- else
- setCodeModel(CodeModel::Small);
-}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
index 885334a..d1569aa 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -29,21 +29,17 @@
namespace llvm {
class formatted_raw_ostream;
+class StringRef;
class X86TargetMachine : public LLVMTargetMachine {
X86Subtarget Subtarget;
X86FrameLowering FrameLowering;
X86ELFWriterInfo ELFWriterInfo;
- Reloc::Model DefRelocModel; // Reloc model before it's overridden.
-private:
- // We have specific defaults for X86.
- virtual void setCodeModelForJIT();
- virtual void setCodeModelForStatic();
-
public:
- X86TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
+ X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool is64Bit);
virtual const X86InstrInfo *getInstrInfo() const {
@@ -87,8 +83,9 @@ class X86_32TargetMachine : public X86TargetMachine {
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
public:
- X86_32TargetMachine(const Target &T, const std::string &M,
- const std::string &CPU, const std::string &FS);
+ X86_32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
@@ -113,8 +110,9 @@ class X86_64TargetMachine : public X86TargetMachine {
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
public:
- X86_64TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ X86_64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index 1231798..991f322 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -43,79 +43,3 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
return Mang->getSymbol(GV);
}
-
-unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- else
- return DW_EH_PE_absptr;
-}
-
-unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- else
- return DW_EH_PE_absptr;
-}
-
-unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const {
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- else
- return DW_EH_PE_absptr;
-}
-
-unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const {
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- else
- return DW_EH_PE_absptr;
-}
-
-unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
- Model == CodeModel::Medium ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
-
- if (Model == CodeModel::Small || Model == CodeModel::Medium)
- return DW_EH_PE_udata4;
-
- return DW_EH_PE_absptr;
-}
-
-unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | (Model == CodeModel::Small ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
-
- if (Model == CodeModel::Small)
- return DW_EH_PE_udata4;
-
- return DW_EH_PE_absptr;
-}
-
-unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const {
- if (CFI)
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-
- return DW_EH_PE_udata4;
-}
-
-unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
- Model == CodeModel::Medium ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
-
- if (Model == CodeModel::Small)
- return DW_EH_PE_udata4;
-
- return DW_EH_PE_absptr;
-}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
index e21b5bf..d7adf27 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -33,28 +33,6 @@ namespace llvm {
MachineModuleInfo *MMI) const;
};
- class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
- const X86TargetMachine &TM;
- public:
- X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
- :TM(tm) { }
- virtual unsigned getPersonalityEncoding() const;
- virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding(bool CFI) const;
- virtual unsigned getTTypeEncoding() const;
- };
-
- class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
- const X86TargetMachine &TM;
- public:
- X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
- :TM(tm) { }
- virtual unsigned getPersonalityEncoding() const;
- virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding(bool CFI) const;
- virtual unsigned getTTypeEncoding() const;
- };
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
new file mode 100644
index 0000000..3958494
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -0,0 +1,105 @@
+//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which inserts x86 AVX vzeroupper instructions
+// before calls to SSE encoded functions. This avoids transition latency
+// penalty when tranfering control between AVX encoded instructions and old
+// SSE encoding mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
+
+namespace {
+ struct VZeroUpperInserter : public MachineFunctionPass {
+ static char ID;
+ VZeroUpperInserter() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ MachineBasicBlock *MBB; // Current basic block
+ };
+ char VZeroUpperInserter::ID = 0;
+}
+
+FunctionPass *llvm::createX86IssueVZeroUpperPass() {
+ return new VZeroUpperInserter();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// vzero upper instructions before function calls.
+bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ bool Changed = false;
+
+ // Process any unreachable blocks in arbitrary order now.
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ Changed |= processBasicBlock(MF, *BB);
+
+ return Changed;
+}
+
+static bool isCallToModuleFn(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "Isn't a call instruction");
+
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isGlobal())
+ continue;
+
+ const GlobalValue *GV = MO.getGlobal();
+ GlobalValue::LinkageTypes LT = GV->getLinkage();
+ if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) ||
+ (GV->isExternalLinkage(LT) && !GV->isDeclaration()))
+ return true;
+
+ return false;
+ }
+ return false;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// inserting vzero upper instructions before function calls.
+bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
+ MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = I->getDebugLoc();
+
+ // Insert a vzeroupper instruction before each control transfer
+ // to functions outside this module
+ if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) {
+ BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
+ ++NumVZU;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index c3b3dc9..0000000
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_llvm_library(LLVMXCoreDesc
- XCoreMCTargetDesc.cpp
- XCoreMCAsmInfo.cpp
- )
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile
deleted file mode 100644
index de61543..0000000
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/XCore/TargetDesc/Makefile ----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMXCoreDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 939d97c..276e841 100644
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "XCoreMCTargetDesc.h"
#include "XCoreMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "XCoreGenInstrInfo.inc"
@@ -35,8 +36,10 @@ static MCInstrInfo *createXCoreMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeXCoreMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitXCoreMCRegisterInfo(X, XCore::LR);
+ return X;
}
static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -46,11 +49,40 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeXCoreMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
- createXCoreMCSubtargetInfo);
+static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new XCoreMCAsmInfo(T, TT);
+
+ // Initial state of the frame pointer is SP.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(XCore::SP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
}
-extern "C" void LLVMInitializeXCoreMCAsmInfo() {
- RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget);
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheXCoreTarget, createXCoreMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheXCoreTarget,
+ createXCoreMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheXCoreTarget, createXCoreMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+ createXCoreMCSubtargetInfo);
}
diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 7aa8965..9a0971d 100644
--- a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "XCore.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheXCoreTarget;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 1a43714..8906b24 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -32,11 +33,11 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cctype>
@@ -51,6 +52,7 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
namespace {
class XCoreAsmPrinter : public AsmPrinter {
const XCoreSubtarget &Subtarget;
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
public:
explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
@@ -79,6 +81,7 @@ namespace {
void EmitFunctionEntryLabel();
void EmitInstruction(const MachineInstr *MI);
void EmitFunctionBodyEnd();
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
};
} // end of anonymous namespace
@@ -88,7 +91,7 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
assert(((GV->hasExternalLinkage() ||
GV->hasWeakLinkage()) ||
GV->hasLinkOnceLinkage()) && "Unexpected linkage");
- if (const ArrayType *ATy = dyn_cast<ArrayType>(
+ if (ArrayType *ATy = dyn_cast<ArrayType>(
cast<PointerType>(GV->getType())->getElementType())) {
OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
// FIXME: MCStreamerize.
@@ -261,16 +264,57 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
+void XCoreAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps == 4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+MachineLocation XCoreAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ // Handles frame addresses emitted in XCoreInstrInfo::emitFrameIndexDebugValue.
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream O(Str);
- // Check for mov mnemonic
- if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
- O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
- << getRegisterName(MI->getOperand(1).getReg());
- else
- printInstruction(MI, O);
+ switch (MI->getOpcode()) {
+ case XCore::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case XCore::ADD_2rus:
+ if (MI->getOperand(2).getImm() == 0) {
+ O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << getRegisterName(MI->getOperand(1).getReg());
+ OutStreamer.EmitRawText(O.str());
+ return;
+ }
+ break;
+ }
+ printInstruction(MI, O);
OutStreamer.EmitRawText(O.str());
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index 0578220..7f8b169 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -100,7 +100,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
bool FP = hasFP(MF);
- bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest);
+ bool Nested = MF.getFunction()->
+ getAttributes().hasAttrSomewhere(Attribute::Nest);
if (Nested) {
loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
@@ -270,14 +271,6 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
- // Initial state of the frame pointer is SP.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(XCore::SP, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
index 7da19f0..c591e93 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -42,8 +42,6 @@ namespace llvm {
bool hasFP(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
-
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index a8dd847..4dac1ce 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -169,9 +169,14 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI.getPointerTy());
- return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
- MVT::Other, CPIdx,
- CurDAG->getEntryNode());
+ SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
+ MVT::Other, CPIdx,
+ CurDAG->getEntryNode());
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4);
+ cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
+ return node;
}
break;
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 6d040e0..2afe0e3 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -81,6 +81,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
// Use i32 for setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// XCore does not have the NodeTypes below.
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
@@ -147,7 +148,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// TRAMPOLINE is custom lowered.
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
maxStoresPerMemmove = maxStoresPerMemmoveOptSize
@@ -180,7 +182,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD:
case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
return SDValue();
@@ -252,8 +255,8 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
}
-static inline bool isZeroLengthArray(const Type *Ty) {
- const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+static inline bool isZeroLengthArray(Type *Ty) {
+ ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
return AT && (AT->getNumElements() == 0);
}
@@ -275,7 +278,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
llvm_unreachable("Thread local object not a GlobalVariable?");
return SDValue();
}
- const Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+ Type *Ty = cast<PointerType>(GV->getType())->getElementType();
if (!Ty->isSized() || isZeroLengthArray(Ty)) {
#ifndef NDEBUG
errs() << "Size of thread local object " << GVar->getName()
@@ -465,7 +468,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
// Lower to a call to __misaligned_load(BasePtr).
- const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -524,7 +527,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
}
// Lower to a call to __misaligned_store(BasePtr, Value).
- const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -789,7 +792,12 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
}
SDValue XCoreTargetLowering::
-LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue XCoreTargetLowering::
+LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -841,9 +849,7 @@ LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(TrmpAddr, 16), false, false,
0);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
}
//===----------------------------------------------------------------------===//
@@ -1148,10 +1154,10 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
int offset = 0;
// Save remaining registers, storing higher register numbers at a higher
// address
- for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
+ for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
// Create a stack slot
int FI = MFI->CreateFixedObject(4, offset, true);
- if (i == FirstVAReg) {
+ if (i == (int)FirstVAReg) {
XFI->setVarArgsFrameIndex(FI);
}
offset -= StackSlotSize;
@@ -1409,7 +1415,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// operands are constant canonicalize smallest to RHS.
if ((N0C && !N1C) ||
(N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue()))
- return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), N1, N0, N2, N3);
+ return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT),
+ N1, N0, N2, N3);
// lmul(x, 0, a, b)
if (N1C && N1C->isNullValue()) {
@@ -1548,7 +1555,7 @@ static inline bool isImmUs4(int64_t val)
/// by AM is legal for this target, for a load/store of the specified type.
bool
XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
index 9c803be..d6c5b32 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -101,7 +101,7 @@ namespace llvm {
MachineBasicBlock *MBB) const;
virtual bool isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const;
+ Type *Ty) const;
private:
const XCoreTargetMachine &TM;
@@ -145,7 +145,8 @@ namespace llvm {
SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index f90481f..a0946a1 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -17,11 +17,10 @@
#include "llvm/MC/MCContext.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_CTOR
#include "XCoreGenInstrInfo.inc"
@@ -387,6 +386,15 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addImm(0);
}
+MachineInstr*
+XCoreInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(XCore::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
bool XCoreInstrInfo::
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
index 840b1e1..d354802 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -78,6 +78,11 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
virtual bool ReverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
index 55c7527..4d2e93b 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -572,7 +572,7 @@ def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
[(store GRRegs:$val, ADDRdpii:$addr)]>;
//let Uses = [CP] in ..
-let mayLoad = 1, isReMaterializable = 1 in
+let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
defm LDWCP : FRU6_LRU6_cp<"ldw">;
let Uses = [SP] in {
@@ -739,7 +739,7 @@ def LDAP_lu10_ba : _FLU10<(outs),
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
-Defs = [R0, R1, R2, R3, R11, LR] in {
+Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
def BL_u10 : _FU10<
(outs),
(ins calltarget:$target, variable_ops),
@@ -754,7 +754,7 @@ def BL_lu10 : _FLU10<
}
// Two operand short
-// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg)
+// TODO eet, eef, tsetmr
def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
"not $dst, $b",
[(set GRRegs:$dst, (not GRRegs:$b))]>;
@@ -764,15 +764,25 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
[(set GRRegs:$dst, (ineg GRRegs:$b))]>;
let Constraints = "$src1 = $dst" in {
-let neverHasSideEffects = 1 in
def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
- "sext $dst, $src2",
- []>;
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ GRRegs:$src2))]>;
-let neverHasSideEffects = 1 in
def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
- "zext $dst, $src2",
- []>;
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ GRRegs:$src2))]>;
def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
"andnot $dst, $src2",
@@ -819,7 +829,8 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
let Constraints = "$src = $dst" in
def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
"outshr res[$r], $src",
- [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
+ [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r,
+ GRRegs:$src))]>;
def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
"inct $dst, res[$r]",
@@ -836,7 +847,8 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
let Constraints = "$src = $dst" in
def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
"inshr $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
+ [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r,
+ GRRegs:$src))]>;
def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"chkct res[$r], $val",
@@ -846,6 +858,14 @@ def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
"chkct res[$r], $val",
[(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "testct $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>;
+
+def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "testwct $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>;
+
def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"setd res[$r], $val",
[(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
@@ -871,7 +891,6 @@ def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
[(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
// Two operand long
-// TODO endin, peek,
// getd, testlcl
def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
"bitrev $dst, $src",
@@ -917,6 +936,14 @@ def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
"setpsc res[$src1], $src2",
[(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "peek $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+
+def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "endin $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+
// One operand short
// TODO edu, eeu, waitet, waitef, tstart, clrtp
// setdp, setcp, setev, kcall
@@ -960,7 +987,7 @@ def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
-Defs = [R0, R1, R2, R3, R11, LR] in {
+Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
"bla $addr",
[(XCoreBranchLink GRRegs:$addr)]>;
@@ -974,10 +1001,15 @@ def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
"freer res[$r]",
[(int_xcore_freer GRRegs:$r)]>;
-let Uses=[R11] in
+let Uses=[R11] in {
def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
- "setv res[$r], r11",
- [(int_xcore_setv GRRegs:$r, R11)]>;
+ "setv res[$r], r11",
+ [(int_xcore_setv GRRegs:$r, R11)]>;
+
+def SETEV_1r : _F1R<(outs), (ins GRRegs:$r),
+ "setev res[$r], r11",
+ [(int_xcore_setev GRRegs:$r, R11)]>;
+}
def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
"eeu res[$r]",
@@ -985,15 +1017,24 @@ def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
// Zero operand short
// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
-// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
+// stet, getkep, getksp, setkep, getid, kret, dcall, dret,
// dentsp, drestsp
def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
-let Defs = [R11] in
+let Defs = [R11] in {
def GETID_0R : _F0R<(outs), (ins),
- "get r11, id",
- [(set R11, (int_xcore_getid))]>;
+ "get r11, id",
+ [(set R11, (int_xcore_getid))]>;
+
+def GETED_0R : _F0R<(outs), (ins),
+ "get r11, ed",
+ [(set R11, (int_xcore_geted))]>;
+
+def GETET_0R : _F0R<(outs), (ins),
+ "get r11, et",
+ [(set R11, (int_xcore_getet))]>;
+}
def SSYNC_0r : _F0R<(outs), (ins),
"ssync",
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index 357a4a0..1b78b37 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -39,7 +38,7 @@
using namespace llvm;
XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
- : XCoreGenRegisterInfo(), TII(tii) {
+ : XCoreGenRegisterInfo(XCore::LR), TII(tii) {
}
// helper functions
@@ -321,20 +320,8 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
}
-int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int XCoreRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return XCoreGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
-
unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
}
-
-unsigned XCoreRegisterInfo::getRARegister() const {
- return XCore::LR;
-}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index 801d9eb..5c28f39 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -60,7 +60,6 @@ public:
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
//! Return the array of argument passing registers
@@ -74,10 +73,6 @@ public:
//! Return whether to emit frame moves
static bool needsFrameMoves(const MachineFunction &MF);
-
- //! Get DWARF debugging register number
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
index ad069bf..b4e9927 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -13,7 +13,7 @@
#include "XCoreSubtarget.h"
#include "XCore.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 342966a..fdc5d35 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -14,15 +14,15 @@
#include "XCore.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
/// XCoreTargetMachine ctor - Create an ILP32 architecture model
///
-XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-n32"),
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
index 6235ac3..83d09d6d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -32,8 +32,9 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreTargetLowering TLInfo;
XCoreSelectionDAGInfo TSInfo;
public:
- XCoreTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ XCoreTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const XCoreFrameLowering *getFrameLowering() const {
OpenPOWER on IntegriCloud